def run(img): # 전달받은 이미지 저장 및 변환 img_dir = "./imgs/input.png" result_dir = "./result/output.png" img.save(img_dir) # 변환 seg.segmentation() output = Image.open(result_dir) # 사진 체크 후 삭제 if os.path.isfile(img_dir): os.remove(img_dir) if os.path.isfile(result_dir): os.remove(result_dir) byte_io = BytesIO() output.save(byte_io, "PNG") byte_io.seek(0) return byte_io
def update_oa(words, progress): #if progress['oa'] > 0: # oa_dicts = [] #读取id with open(r'data\oa_id.txt', "r", encoding="utf-8") as f: ids = f.read().split() for id in ids[1:]: oa_dict = {} print(id) #根据id读取本地文章 path = os.path.join('data', 'reports', id + '.txt') with open(path, "r", encoding='utf-8') as f: title = f.readline().split() article = f.read() #分词 seg = segmentation(article.split()) word_count = Counter(seg) #print(word_count) wordn = sum(word_count.values()) for word in word_count: word_count[word] = word_count[word] / wordn #print(word_count) words += Counter(word_count.keys()) oa_dict['id'] = id oa_dict['seg'] = word_count oa_dict['title'] = title oa_dict['text'] = article oa_dicts.append(oa_dict) oa_df = pd.DataFrame(oa_dicts) print(oa_df) oa_df.to_json('data/seg/articles_oa.json') progress['oa'] = len(oa_dicts)
def gen_mask(image_path): """ Generate semantic mask """ seg_result = segmentation(image_path).squeeze(0) channel, height_, width_ = seg_result.size() for classes in merge_classes: for index, each_class in enumerate(classes): if index == 0: zeros_index = each_class base_map = seg_result[each_class, :, :].clone() else: base_map = base_map | seg_result[each_class, :, :] seg_result[zeros_index, :, :] = base_map return seg_result, height_, width_
# -*- coding: utf-8 -*- from pymongo import MongoClient from seg import segmentation def conDB(collection): client = MongoClient('int-db01.elana.org', 27017) db_auth = client['las_dev'] db_auth.authenticate("las_dev", "DB41as-1") db = client['las_dev'] curs=db[collection] return curs seg=segmentation() collection_name='discuss_news3' post=conDB(collection_name) query={'created_time':{'$gte':1475251200, '$lt':1477929600}} objs=post.find(query) for obj in objs: content=obj['title'] seg_text=seg.segText(content) post.update({'_id':obj['_id']},{ '$set':{'title_seg':seg_text}}) print 'end'
def update_bug(words, progress, updaterate): startid = progress['bug'] if startid > 0: bug_p = pd.read_json('data/pms_bug.json') else: bug_p = pd.DataFrame() while True: # sleep(1) result_dict = es.search(index="pms_bug", body={ "query": { "constant_score": { "filter": { "range": { "id": { "gt": startid, "lt": startid + updaterate + 1 } } } } } }, size=10000) if len(result_dict['hits']['hits']) == 0: break bug_new = Select.from_dict(result_dict).to_pandas()[[ 'id', 'title', 'steps' ]] bug_new = bug_new.loc[bug_new['id'].drop_duplicates().index, :] bug_new = bug_new.set_index(bug_new['id'].values) bug_new['seg'] = '' for i in bug_new['id'].values: #print(i) seg = segmentation( (bug_new['title'][i] + bug_new['steps'][i]).split()) word_count = Counter(seg) #print(word_count) wordn = sum(word_count.values()) for word in word_count: word_count[word] = word_count[word] / wordn #print(word_count) words += Counter(word_count.keys()) bug_new['seg'][i] = dict(word_count) bug_p = pd.concat([bug_p, bug_new], axis=0) startid = np.sort(bug_new['id'].values)[-1] print(startid) progress['bug'] = int(startid) bug_p.to_json('data/seg/pms_bug.json')
def update_story(words, progress, updaterate): startid = progress['story'] if startid > 0: story_p = pd.read_json('data/pms_story.json') else: story_p = pd.DataFrame() while True: #sleep(1) result_dict = es.search(index="pms_story", body={ "query": { "constant_score": { "filter": { "range": { "story": { "gt": startid, "lt": startid + updaterate + 1 } } } } } }, size=10000) if len(result_dict['hits']['hits']) == 0: break story_new = Select.from_dict(result_dict).to_pandas()[[ 'story', 'title', 'spec' ]] # sql = 'select story, title, spec from zt_storyspec where story > {0} limit {1}'.format(startid, updaterate) # story_new = pd.read_sql(sql, engine) # ifremain = (len(story_new) == updaterate) # print(len(story_new)) # # print(ifremain) # from nohtml import strip_tags # story_new['spec'] = story_new['spec'].apply(lambda x:strip_tags(x)) story_new = story_new.loc[ story_new['story'].drop_duplicates().index, :] story_new = story_new.set_index(story_new['story'].values) story_new['seg'] = '' for i in story_new['story'].values: seg = segmentation( (story_new['title'][i] + story_new['spec'][i]).split()) word_count = Counter(seg) print(i) print(word_count) wordn = sum(word_count.values()) for word in word_count: word_count[word] = word_count[word] / wordn #print(word_count) words += Counter(word_count.keys()) story_new['seg'][i] = dict(word_count) story_p = pd.concat([story_p, story_new], axis=0) startid = np.sort(story_new['story'].values)[-1] print(startid) progress['story'] = int(startid) story_p.to_json('data/seg/pms_story.json')
def update_task(words, progress, updaterate): startid = progress['task'] if startid > 0: task_p = pd.read_json('data/pms_task.json') else: task_p = pd.DataFrame() while True: # sleep(1) result_dict = es.search(index="pms_task", body={ "query": { "constant_score": { "filter": { "range": { "id": { "gt": startid, "lt": startid + updaterate + 1 } } } } } }, size=10000) if len(result_dict['hits']['hits']) == 0: break task_new = Select.from_dict(result_dict).to_pandas()[[ 'name', 'id', 'desc' ]] # from nohtml import strip_tags # task_new['desc'] = task_new['desc'].apply(lambda x:strip_tags(x)) task_new = task_new.loc[task_new['id'].drop_duplicates().index, :] task_new = task_new.set_index(task_new['id'].values) #task_new['id'] = task_new['id'].astype(str) task_new['seg'] = '' for i in task_new['id'].values: print(i) seg = segmentation( (task_new['name'][i] + task_new['desc'][i]).split()) word_count = Counter(seg) print(word_count) wordn = sum(word_count.values()) for word in word_count: word_count[word] = word_count[word] / wordn print(word_count) words += Counter(word_count.keys()) task_new['seg'][i] = dict(word_count) task_p = pd.concat([task_p, task_new], axis=0) startid = np.sort(task_new['id'].values)[-1] progress['task'] = int(startid) task_p.to_json('data/seg/pms_task.json')
def update_confluence(words, progress): confluence = Confluence(url='http://docs.fscut.com', username='******', password='******') #if progress['oa'] > 0: # confluence_dicts = [] #读取id with open(r'data\id_confluence.txt', "r", encoding="utf-8") as f: ids = f.read().split() for id in ids[1:]: #根据id从接口获取页面信息 page_info = confluence.get_page_by_id( id, expand='space,body.view,version,container') content = page_info['body']['view']['value'] content = page_info['body']['view']['value'] soup = BeautifulSoup(content, 'lxml') article = soup.text title = page_info['title'] if len(title) < 15: continue confluence_dict = {} print(id) #根据id读取本地文章 #path = os.path.join('data','reports', id + '.txt') # with open(path, "r", encoding='utf-8') as f: # title = f.readline().split() # article = f.read() #分词 seg = segmentation(article.split()) print(seg) word_count = Counter(seg) #print(word_count) wordn = sum(word_count.values()) for word in word_count: word_count[word] = word_count[word] / wordn #print(word_count) words += Counter(word_count.keys()) confluence_dict['id'] = id confluence_dict['seg'] = word_count confluence_dict['title'] = title confluence_dict['text'] = article confluence_dict['date'] = title.split(' ')[0] print(confluence_dict['date']) #从标题解析作者 confluence_dict['author'] = re.split('[(-]', title)[-2] print(confluence_dict['author']) confluence_dict[ 'url'] = 'http://docs.fscut.com/pages/viewpage.action?pageId=' + id confluence_dicts.append(confluence_dict) confluence_df = pd.DataFrame(confluence_dicts) print(confluence_df) confluence_df.to_json('data/seg/articles_confluence.json') progress['confluence'] = len(confluence_dicts)