Exemple #1
0
def run(img):
    # 전달받은 이미지 저장 및 변환
    img_dir = "./imgs/input.png"
    result_dir = "./result/output.png"

    img.save(img_dir)

    # 변환
    seg.segmentation()

    output = Image.open(result_dir)

    # 사진 체크 후 삭제
    if os.path.isfile(img_dir):
        os.remove(img_dir)
    if os.path.isfile(result_dir):
        os.remove(result_dir)

    byte_io = BytesIO()
    output.save(byte_io, "PNG")
    byte_io.seek(0)

    return byte_io
def update_oa(words, progress):

    #if progress['oa'] > 0:
    #

    oa_dicts = []

    #读取id
    with open(r'data\oa_id.txt', "r", encoding="utf-8") as f:
        ids = f.read().split()

    for id in ids[1:]:
        oa_dict = {}
        print(id)
        #根据id读取本地文章
        path = os.path.join('data', 'reports', id + '.txt')
        with open(path, "r", encoding='utf-8') as f:
            title = f.readline().split()
            article = f.read()

        #分词
        seg = segmentation(article.split())

        word_count = Counter(seg)
        #print(word_count)
        wordn = sum(word_count.values())
        for word in word_count:
            word_count[word] = word_count[word] / wordn
        #print(word_count)

        words += Counter(word_count.keys())

        oa_dict['id'] = id

        oa_dict['seg'] = word_count

        oa_dict['title'] = title

        oa_dict['text'] = article

        oa_dicts.append(oa_dict)

    oa_df = pd.DataFrame(oa_dicts)

    print(oa_df)

    oa_df.to_json('data/seg/articles_oa.json')

    progress['oa'] = len(oa_dicts)
Exemple #3
0
def gen_mask(image_path):
    """
    Generate semantic mask
    """
    seg_result = segmentation(image_path).squeeze(0)
    channel, height_, width_ = seg_result.size()

    for classes in merge_classes:
        for index, each_class in enumerate(classes):
            if index == 0:
                zeros_index = each_class
                base_map = seg_result[each_class, :, :].clone()
            else:
                base_map = base_map | seg_result[each_class, :, :]
        seg_result[zeros_index, :, :] = base_map

    return seg_result, height_, width_
Exemple #4
0
# -*- coding: utf-8 -*-
from pymongo import MongoClient
from seg import segmentation

def conDB(collection):
    client = MongoClient('int-db01.elana.org', 27017)
    db_auth = client['las_dev']
    db_auth.authenticate("las_dev", "DB41as-1")
    db = client['las_dev']
    curs=db[collection]
    return curs


seg=segmentation()    

collection_name='discuss_news3'
post=conDB(collection_name)
query={'created_time':{'$gte':1475251200, '$lt':1477929600}}
objs=post.find(query)

for obj in objs:
    content=obj['title']
    seg_text=seg.segText(content)
    post.update({'_id':obj['_id']},{ '$set':{'title_seg':seg_text}})
    
print 'end'
Exemple #5
0
def update_bug(words, progress, updaterate):

    startid = progress['bug']

    if startid > 0:
        bug_p = pd.read_json('data/pms_bug.json')
    else:
        bug_p = pd.DataFrame()

    while True:

        # sleep(1)

        result_dict = es.search(index="pms_bug",
                                body={
                                    "query": {
                                        "constant_score": {
                                            "filter": {
                                                "range": {
                                                    "id": {
                                                        "gt":
                                                        startid,
                                                        "lt":
                                                        startid + updaterate +
                                                        1
                                                    }
                                                }
                                            }
                                        }
                                    }
                                },
                                size=10000)

        if len(result_dict['hits']['hits']) == 0:
            break

        bug_new = Select.from_dict(result_dict).to_pandas()[[
            'id', 'title', 'steps'
        ]]

        bug_new = bug_new.loc[bug_new['id'].drop_duplicates().index, :]

        bug_new = bug_new.set_index(bug_new['id'].values)

        bug_new['seg'] = ''
        for i in bug_new['id'].values:
            #print(i)

            seg = segmentation(
                (bug_new['title'][i] + bug_new['steps'][i]).split())
            word_count = Counter(seg)
            #print(word_count)
            wordn = sum(word_count.values())
            for word in word_count:
                word_count[word] = word_count[word] / wordn
            #print(word_count)

            words += Counter(word_count.keys())
            bug_new['seg'][i] = dict(word_count)

        bug_p = pd.concat([bug_p, bug_new], axis=0)

        startid = np.sort(bug_new['id'].values)[-1]
        print(startid)

    progress['bug'] = int(startid)

    bug_p.to_json('data/seg/pms_bug.json')
Exemple #6
0
def update_story(words, progress, updaterate):

    startid = progress['story']

    if startid > 0:
        story_p = pd.read_json('data/pms_story.json')
    else:
        story_p = pd.DataFrame()

    while True:

        #sleep(1)

        result_dict = es.search(index="pms_story",
                                body={
                                    "query": {
                                        "constant_score": {
                                            "filter": {
                                                "range": {
                                                    "story": {
                                                        "gt":
                                                        startid,
                                                        "lt":
                                                        startid + updaterate +
                                                        1
                                                    }
                                                }
                                            }
                                        }
                                    }
                                },
                                size=10000)

        if len(result_dict['hits']['hits']) == 0:
            break

        story_new = Select.from_dict(result_dict).to_pandas()[[
            'story', 'title', 'spec'
        ]]

        # sql = 'select story, title, spec from zt_storyspec where story > {0} limit {1}'.format(startid, updaterate)
        # story_new = pd.read_sql(sql, engine)
        # ifremain = (len(story_new) == updaterate)
        # print(len(story_new))
        #
        # print(ifremain)

        # from nohtml import strip_tags
        # story_new['spec'] = story_new['spec'].apply(lambda x:strip_tags(x))
        story_new = story_new.loc[
            story_new['story'].drop_duplicates().index, :]

        story_new = story_new.set_index(story_new['story'].values)

        story_new['seg'] = ''
        for i in story_new['story'].values:

            seg = segmentation(
                (story_new['title'][i] + story_new['spec'][i]).split())
            word_count = Counter(seg)
            print(i)
            print(word_count)
            wordn = sum(word_count.values())
            for word in word_count:
                word_count[word] = word_count[word] / wordn
            #print(word_count)

            words += Counter(word_count.keys())
            story_new['seg'][i] = dict(word_count)

        story_p = pd.concat([story_p, story_new], axis=0)

        startid = np.sort(story_new['story'].values)[-1]

        print(startid)

    progress['story'] = int(startid)

    story_p.to_json('data/seg/pms_story.json')
Exemple #7
0
def update_task(words, progress, updaterate):

    startid = progress['task']

    if startid > 0:
        task_p = pd.read_json('data/pms_task.json')
    else:
        task_p = pd.DataFrame()

    while True:

        # sleep(1)

        result_dict = es.search(index="pms_task",
                                body={
                                    "query": {
                                        "constant_score": {
                                            "filter": {
                                                "range": {
                                                    "id": {
                                                        "gt":
                                                        startid,
                                                        "lt":
                                                        startid + updaterate +
                                                        1
                                                    }
                                                }
                                            }
                                        }
                                    }
                                },
                                size=10000)

        if len(result_dict['hits']['hits']) == 0:
            break

        task_new = Select.from_dict(result_dict).to_pandas()[[
            'name', 'id', 'desc'
        ]]

        # from nohtml import strip_tags
        # task_new['desc'] = task_new['desc'].apply(lambda x:strip_tags(x))
        task_new = task_new.loc[task_new['id'].drop_duplicates().index, :]

        task_new = task_new.set_index(task_new['id'].values)

        #task_new['id'] = task_new['id'].astype(str)

        task_new['seg'] = ''

        for i in task_new['id'].values:
            print(i)

            seg = segmentation(
                (task_new['name'][i] + task_new['desc'][i]).split())
            word_count = Counter(seg)
            print(word_count)
            wordn = sum(word_count.values())
            for word in word_count:
                word_count[word] = word_count[word] / wordn
            print(word_count)

            words += Counter(word_count.keys())
            task_new['seg'][i] = dict(word_count)

        task_p = pd.concat([task_p, task_new], axis=0)

        startid = np.sort(task_new['id'].values)[-1]

    progress['task'] = int(startid)

    task_p.to_json('data/seg/pms_task.json')
def update_confluence(words, progress):

    confluence = Confluence(url='http://docs.fscut.com',
                            username='******',
                            password='******')

    #if progress['oa'] > 0:
    #

    confluence_dicts = []

    #读取id
    with open(r'data\id_confluence.txt', "r", encoding="utf-8") as f:
        ids = f.read().split()

    for id in ids[1:]:

        #根据id从接口获取页面信息
        page_info = confluence.get_page_by_id(
            id, expand='space,body.view,version,container')
        content = page_info['body']['view']['value']

        content = page_info['body']['view']['value']
        soup = BeautifulSoup(content, 'lxml')
        article = soup.text
        title = page_info['title']

        if len(title) < 15:
            continue

        confluence_dict = {}
        print(id)

        #根据id读取本地文章
        #path = os.path.join('data','reports',  id + '.txt')
        # with open(path, "r", encoding='utf-8') as f:
        #     title = f.readline().split()
        #     article = f.read()

        #分词
        seg = segmentation(article.split())

        print(seg)

        word_count = Counter(seg)
        #print(word_count)
        wordn = sum(word_count.values())
        for word in word_count:
            word_count[word] = word_count[word] / wordn
        #print(word_count)

        words += Counter(word_count.keys())

        confluence_dict['id'] = id

        confluence_dict['seg'] = word_count

        confluence_dict['title'] = title

        confluence_dict['text'] = article

        confluence_dict['date'] = title.split(' ')[0]
        print(confluence_dict['date'])

        #从标题解析作者
        confluence_dict['author'] = re.split('[(-]', title)[-2]

        print(confluence_dict['author'])

        confluence_dict[
            'url'] = 'http://docs.fscut.com/pages/viewpage.action?pageId=' + id

        confluence_dicts.append(confluence_dict)

    confluence_df = pd.DataFrame(confluence_dicts)

    print(confluence_df)

    confluence_df.to_json('data/seg/articles_confluence.json')

    progress['confluence'] = len(confluence_dicts)