def merge_file(filename): data = Data(filename) output = data.get('OUTPUT') collections = data.collections() merge = Merge(collections) generator = merge.generator() return (generator, output)
def main(): url="http://www.occ.gov/topics/licensing/interpretations-and-actions/index-interpretations-and-actions.html" r1=Range98_96(url) r2=Range04_99(url) r3=Range05_10(url) r4=Range11_12(url) m=Merge() print " WEB SCRAPING " print " ============================= " print "Fetching the url for range 1996-1998:" r1.fetchurl98_96() print "Fetching the url for range 1999-2004:" r2.fetchurl04_99() print "Fetching the url for range 2005-2010:" r3.fetchurl05_10() print "Fetching the url for range 2011-2012:" r4.fetchurl11_12() print "Fetching data from 1996-1998" r1.fetchdata98_96() print "Fetching data from 1999-2004" r2.fetchdata04_99() print "Fetching data from 2005-2010" r3.fetchdata05_10() print "Fetching data from 2011-2012" r4.fetchdata11_12() print "Merging all the csv files" m.call() print "Check the ouput CSV File (The Required Database) " print " *************************************** "
def task_merge_doubanvideo(): m = Merge() while True: task = rd.spop("task_merge_doubanvideo") if task: m.merge_doubanvideo(query={"_id": ObjectId(task)}) else: break
def task_merge_youku_videos(): m = Merge() while True: task = rd.spop("task_merge_youku_videos") if task: m.merge_youku_videos(query={"_id": ObjectId(task)}) else: break
def task_merge_letvstar(): m = Merge() while True: task = rd.spop("task_merge_letvstar") if task: m.merge_letvstar(query={"_id": ObjectId(task)}) else: break
def test_range(self): cnt = 5 lists = [iter(xrange(sys.maxint)) for _ in range(cnt)] merge = Merge(lists) generator = merge.generator() for value in range(10): for _ in range(cnt): assert next(generator) == value
def test_infinity(self): def infinity(value): while True: yield value merge = Merge((infinity(1), infinity(2))) generator = merge.generator() assert next(generator) == 1
def verify_fileds(self): # this funtion will check to # see if the fields or check # or not if not raise an erro message invisbale_button = self.invisbale_button.get() merge_button = self.merge_button.get() icon_button = self.icon_button.get() filename = self.entry_var.get() icon = self.icon.get() if filename.endswith(('.py','.pyw')): if invisbale_button == True and merge_button == True and icon_button == True: if icon.endswith('.ico'): # Calling the ain1 class a1 = Ain1(self.master,filename,icon) a1.merge() else: messagebox.showerror(parent=self.master,title='Icon Error',message="Icon file should end with a (.ico) extension") elif invisbale_button == 1 and merge_button == 1: # callign the inmerge class inmerge = Inmerge(self.master,filename) inmerge.merge() elif invisbale_button == True and icon_button == True: if icon.endswith('.ico'): #calling the inicon class inicon = Inicon(self.master,filename,icon) inicon.merge() else: messagebox.showerror(parent=self.master,title='Icon Error',message="Icon file should end with a (.ico) extension") elif merge_button == True and icon_button == True: if icon.endswith('.ico'): #calling the mergeicon class mergeicon = MergeIcon(self.master,filename,icon) mergeicon.merge() else: messagebox.showerror(parent=self.master,title='Icon Error',message="Icon file should end with a (.ico) extension") elif invisbale_button == True: #calling the hidden class hidden = Hidden(self.master,filename) hidden.merge() elif merge_button == True: #calling the merge class merge = Merge(self.master,filename) merge.merge() elif icon_button == True: messagebox.showerror(parent=self.master,title='Icon Error',message="Icon cannot be merged alone") else: messagebox.showerror(parent=self.master,title='Invalid Error',message="Please atleast select merge") else: messagebox.showerror(parent=self.master,title='Filename Error',message="File shoud end with a (.py or .pyw) extention.")
def selBtnEvent(self): for widget in self.workFrm.winfo_children(): widget.destroy() if self.selV.get() == 1: Merge(self) if self.selV.get() == 2: Split(self) if self.selV.get() == 3: Insert(self) if self.selV.get() == 4: Delete(self)
def create_objects(self, test_directory): """Traverses the XML structure and creates test objects Arguments: test_directory - The test directory. """ for element in self.root: if element.tag == 'maker': self.tests.append(Maker(element, test_directory)) if element.tag == 'merge': self.tests.append(Merge(element, test_directory))
def Experiment(): l = 2**9 M= 200 N = l*M i=1 #M = 3 * 2**24 print "Experiment, l: " + str(l) + ' M: ' + str(M) origin = Sfile('origin') inputs = Sfile('inputs') outputs= Sfile('outputs') origin.delete() inputs.delete() outputs.delete() origin.reopen() inputs.reopen() outputs.reopen() writeRandomNumbers(origin,N) origin.reopen() init_time = time() Ms = Merge(l,M,origin,inputs,outputs) Ms.mergesort_k(i) end_time = time() print 'time: ' + str(end_time - init_time) + ' [sg]' #outputs.close() #outputs.reopen() #outputs.seek(0,0) #values = outputs.reads(N) #sortTest(values) #print str(len(values)) + ' result: ' + str(values) origin.close() inputs.close() outputs.close()
def launch(baseType='mecanum', debug=True, record=True, openViewer=False, openROS=False): global __isLaunched if not __isLaunched: global base, merge, dash, feed, flow, remote, spy base = Mecanum() if baseType == 'mecanum' else Omni() merge = Merge() dash = Dash(base, merge) feed = Feed(dash) flow = Flow(dash, debug=debug) remote = Remote(dash, flow) spy = Spy(dash) __isLaunched = True base.enable() if record: spy.begin() if openViewer: from viewer import Viewer global viewer viewer = Viewer(dash) if openROS: from ros import Ros global ros ros = Ros(dash)
def task_groupLanguageAndProducer_country(): m = Merge() m.groupLanguageAndProducer_country(query={})
def task_rm_unknown(): m = Merge() m.rm_unknown(query={})
def task_clean_category(): m = Merge() m.clean_category(query={})
# from scrapy.cmdline import execute # from os import system import subprocess from merge import Merge name = '22tu' if __name__ == '__main__': # cmd = f'scrapy crawl {name}' # execute(cmd.split(' ')) for episode in range(30, 31): SOAP_ID = '28230' EPISODE = episode SOAP_NAME = 'doutinghao' print(f"正在下载的片名是:{SOAP_NAME}\n剧集是:{EPISODE}") child_crawl = subprocess.call(f"scrapy crawl {name} \ -a SOAP_ID={SOAP_ID} -a EPISODE={EPISODE} -a SOAP_NAME={SOAP_NAME}") # child_crawl.wait() print(f'正在合成ts文件') merge = Merge(SOAP_NAME=SOAP_NAME, EPISODE=EPISODE) merge.main() print(f"{SOAP_NAME}_{EPISODE}下载完毕")
def task_categories(): m = Merge() m.groupCategories(query={})
def test_numbers(self): lists = ((1, 2), (2, 3, 5), (3, 4)) lists = [iter(l) for l in lists] merge = Merge(lists) generator = merge.generator() assert list(generator) == [1, 2, 2, 3, 3, 4, 5]
output_filename = 'output.docx' output_filepath = os.path.join(os.getcwd(), output_filename) tmp_dir1 = os.path.join(os.getcwd(), 'tmp\\split') tmp_dir2 = os.path.join(os.getcwd(), 'tmp\\split-processed') from misc import mkdir if __name__ == "__main__": # extract = Extract(input_filepath) extract.process() #output_filepath=extract_filepath) db['project_info'].set_db(extract.extract_project_infos()) # mkdir(tmp_dir1) split = Split(input_filepath=extract_filepath) sections = split.process() # db['finance'].filtering(need_years=3) db['human'].select_people(name_list=['总经理姓名', '联系人姓名', '项目经理人姓名']) db['projects_done'].filtering(project_types=['水利'], need_years=3) db['projects_being'].filtering(project_types=['水利']) # mkdir(tmp_dir2) for section in sections: fillin = FillIn(os.path.join(tmp_dir1, section + '.docx')) fillin.process(os.path.join(tmp_dir2, section + '.docx')) # merge = Merge(tmpdir=tmp_dir2, section_names=sections) merge.process(output_filepath) #
class Extract: def __init__(self, jieba=False): import re self.de = re.compile(u"[\u4e00-\u9fa5]") self.jieba = jieba self.relation = { u'fuqin': ('PERSON', 'PERSON'), u'erzi': ('PERSON', 'PERSON'), u'nver': ('PERSON', 'PERSON'), u'nvyou': ('PERSON', 'PERSON'), u'nanyou': ('PERSON', 'PERSON'), u'muqin': ('PERSON', 'PERSON'), u'emma': ('PERSON', 'PERSON'), u'zhangfu': ('PERSON', 'PERSON'), u'qizi': ('PERSON', 'PERSON'), u'\u5973\u53cb': ('PERSON', 'PERSON'), u'\u5973\u513f': ('PERSON', 'PERSON'), u'\u59bb\u5b50': ('PERSON', 'PERSON'), u'\u4e08\u592b': ('PERSON', 'PERSON'), u'\u524d\u592b': ('PERSON', 'PERSON'), u'\u7236\u4eb2': ('PERSON', 'PERSON'), u'\u8eab\u9ad8': ('PERSON', 'HEIGHT'), u'\u751f\u65e5': ('PERSON', 'DATE'), u'\u64ad\u51fa\u65f6\u95f4': ('FILM', 'TIME'), u'\u4e3b\u9898\u66f2': ('FILM', 'MUSIC') } self.pos_tagger = { 'a': 0, 'ad': 1, 'ag': 2, 'an': 3, 'b': 4, 'bg': 5, 'c': 6, 'd': 7, 'df': 8, 'dg': 9, 'e': 10, 'en': 11, 'f': 12, 'g': 13, 'h': 14, 'i': 15, 'in': 16, 'j': 17, 'jn': 18, 'k': 19, 'l': 20, 'ln': 21, 'm': 22, 'mg': 23, 'mq': 24, 'n': 25, 'ng': 26, 'nr': 27, 'nrfg': 28, 'nrt': 29, 'ns': 30, 'nt': 31, 'nz': 32, 'o': 33, 'p': 34, 'q': 35, 'qe': 36, 'qg': 37, 'r': 38, 'rg': 39, 'rr': 40, 'rz': 41, 's': 42, 't': 43, 'tg': 44, 'u': 45, 'ud': 46, 'ug': 47, 'uj': 48, 'ul': 49, 'uv': 50, 'uz': 51, 'v': 52, 'vd': 53, 'vg': 54, 'vi': 55, 'vn': 56, 'vq': 57, 'w': 58, 'x': 59, 'y': 60, 'yg': 61, 'z': 62, 'zg': 63, 'a': 64, 'ad': 65, 'ag': 66, 'an': 67, 'b': 68, 'bg': 69, 'c': 70, 'd': 71, 'df': 72, 'dg': 73, 'e': 74, 'en': 75, 'f': 76, 'g': 77, 'h': 78, 'i': 79, 'in': 80, 'j': 81, 'jn': 82, 'k': 83, 'l': 84, 'ln': 85, 'm': 86, 'mg': 87, 'mq': 88, 'n': 89, 'ng': 90, 'nr': 91, 'nrfg': 92, 'nrt': 93, 'ns': 94, 'nt': 95, 'nz': 96, 'o': 97, 'p': 98, 'q': 99, 'qe': 100, 'qg': 101, 'r': 102, 'rg': 103, 'rr': 104, 'rz': 105, 's': 106, 't': 107, 'tg': 108, 'u': 109, 'ud': 110, 'ug': 111, 'uj': 112, 'ul': 113, 'uv': 114, 'uz': 115, 'v': 116, 'vd': 117, 'vg': 118, 'vi': 119, 'vn': 120, 'vq': 121, 'w': 122, 'x': 123, 'y': 124, 'yg': 125, 'z': 126, 'zg': 127, 'a': 128, 'ad': 129, 'ag': 130, 'an': 131, 'b': 132, 'bg': 133, 'c': 134, 'd': 135, 'df': 136, 'dg': 137, 'e': 138, 'en': 139, 'f': 140, 'g': 141, 'h': 142, 'i': 143, 'in': 144, 'j': 145, 'jn': 146, 'k': 147, 'l': 148, 'ln': 149, 'm': 150, 'mg': 151, 'mq': 152, 'n': 153, 'ng': 154, 'nr': 155, 'nrfg': 156, 'nrt': 157, 'ns': 158, 'nt': 159, 'nz': 160, 'o': 161, 'p': 162, 'q': 163, 'qe': 164, 'qg': 165, 'r': 166, 'rg': 167, 'rr': 168, 'rz': 169, 's': 170, 't': 171, 'tg': 172, 'u': 173, 'ud': 174, 'ug': 175, 'uj': 176, 'ul': 177, 'uv': 178, 'uz': 179, 'v': 180, 'vd': 181, 'vg': 182, 'vi': 183, 'vn': 184, 'vq': 185, 'w': 186, 'x': 187, 'y': 188, 'yg': 189, 'z': 190, 'zg': 191, 'a': 192, 'ad': 193, 'ag': 194, 'an': 195, 'b': 196, 'bg': 197, 'c': 198, 'd': 199, 'df': 200, 'dg': 201, 'e': 202, 'en': 203, 'f': 204, 'g': 205, 'h': 206, 'i': 207, 'in': 208, 'j': 209, 'jn': 210, 'k': 211, 'l': 212, 'ln': 213, 'm': 214, 'mg': 215, 'mq': 216, 'n': 217, 'ng': 218, 'nr': 219, 'nrfg': 220, 'nrt': 221, 'ns': 222, 'nt': 223, 'nz': 224, 'o': 225, 'p': 226, 'q': 227, 'qe': 228, 'qg': 229, 'r': 230, 'rg': 231, 'rr': 232, 'rz': 233, 's': 234, 't': 235, 'tg': 236, 'u': 237, 'ud': 238, 'ug': 239, 'uj': 240, 'ul': 241, 'uv': 242, 'uz': 243, 'v': 244, 'vd': 245, 'vg': 246, 'vi': 247, 'vn': 248, 'vq': 249, 'w': 250, 'x': 251, 'y': 252, 'yg': 253, 'z': 254, 'zg': 255, 'eng': 256 } self.m = Merge(True, False) #self.m = Merge(True,True) pass #get the ner using merge and search the relation's Ner def _process_data(self, lines, newwords, n2, tags=None): s = [] p = [] _seg = [] _ner = [] self.m.add_new_words(newwords) if n2 is not None: self.m.add_new_words(n2) for i in xrange(len(lines)): line = lines[i] (line_seg, line_pos, line_ner) = self.m.ner_using_nlpc(line) #(line_ner,line_pos,line_seg,line_dep) = self.m.get_line_info(line,False) if tags is not None: tag = tags[i] k = line_ner.count((self.relation[tag.decode('utf-8')])[1]) if k == 0: continue elif k == 1: if (self.relation[tag.decode('utf-8')])[1] == ( self.relation[tag.decode('utf-8')])[1]: continue else: return seg = line_seg.split('\t') pos = line_pos.split('\t') ner = line_ner.split('\t') s.append(newwords[i][0].decode('utf-8')) p.append(tag) _seg.append(seg) _ner.append(ner) return (s, p, _seg, _ner) def statistics(self, newwords, tags, segs, ners): s = [] p = [] answer = [] fromline = [] for i in xrange(len(tags)): tag = tags[i] seg = segs[i] ner = ners[i] _a = [] print ' '.join(seg).encode('utf-8') for id in xrange(len(seg)): if tags is not None: if ner[id] == (self.relation[tag.decode('utf-8')])[1]: ll = len(self.de.findall(seg[id])) if ll == 0: ll = len(seg[id]) if (seg[id] != newwords[i]) and (seg[id] not in _a) and ( ll > 1) and seg[id].isdigit() == False: print newwords[i].encode( 'utf-8') + ',' + tag.encode( 'utf-8') + ',' + seg[id].encode('utf-8') _a.append(seg[id]) answer.append(seg[id]) s.append(newwords[i]) fromline.append(''.join(seg)) p.append(tag) dict = collections.OrderedDict() for i in xrange(len(s)): s[i] = s[i].decode('utf-8') spo = s[i] + p[i] + answer[i] if spo in dict: dict[spo][2] += 1 else: dict[spo] = [] dict[spo].append(s[i] + '\t' + p[i]) dict[spo].append(answer[i]) dict[spo].append(1) dict[spo].append(fromline[i]) #result = {'sp':[[answer,count,line]]} result = collections.OrderedDict() for (k, v) in dict.items(): sp = v[0] if sp in result: if v[2] > result[sp][0][1]: result[sp] = [] ddd = [] ddd.append(v[1]) ddd.append(v[2]) ddd.append(v[3]) result[sp].append(ddd) elif v[2] == result[sp][0][1]: ddd = [] ddd.append(v[1]) ddd.append(v[2]) ddd.append(v[3]) result[sp].append(ddd) else: result[sp] = [] ddd = [] ddd.append(v[1]) ddd.append(v[2]) ddd.append(v[3]) result[sp].append(ddd) list = [] for (k, v) in result.items(): for i in xrange(len(v)): value = v[i] if value[1] == 1: list.append(k + '\t' + value[0] + '\t' + 'not sure' + '\t' + value[2]) else: list.append(k + '\t' + value[0] + '\t' + str(value[1]) + '\t' + value[2]) return list def test3(self): lines = [] tags = [] newwords = [] newwords2 = [] for line in sys.stdin: try: line = line.split('\t') if len(line) < 5: print 'read wrong:' + '\t'.join(line) continue tags.append(line[1]) newwords.append( (line[0], (self.relation[line[1].decode('utf-8')])[0])) newwords2.append( (line[2], (self.relation[line[1].decode('utf-8')])[0])) if line[4].strip() != '': lines.append(line[4].strip()) else: print 'read wrong:' + '\t'.join(line) except: print 'read wrong:' + '\t'.join(line) (s, p, _seg, _ner) = self._process_data(lines, newwords, None, tags=tags) list = self.statistics(s, p, _seg, _ner) for l in list: print l.encode('utf-8') def test2(self): lines = [] tags = [] newwords = [] newwords2 = [] for line in sys.stdin: try: line = line.split('\t') if len(line) < 6: print 'read wrong:' + '\t'.join(line) continue tags.append(line[1]) newwords.append( (line[0], (self.relation[line[1].decode('utf-8')])[0])) newwords2.append( (line[2], (self.relation[line[1].decode('utf-8')])[0])) if line[5].strip() != '': lines.append(line[5].strip()) else: print 'read wrong:' + '\t'.join(line) except: print 'read wrong:' + '\t'.join(line) (s, p, _seg, _ner) = self._process_data(lines, newwords, newwords2, tags=tags) list = self.statistics(s, p, _seg, _ner) for l in list: print l.encode('utf-8') def test1(self): lines = [] tags = [] newwords = [] newwords2 = [] ss = '' pstr = '' anstr = '' check = False wf = open('result_fanhua_1', 'ab') current = 0 all = 0 for line in sys.stdin: line = line.split('\t') if len(line) < 5: print 'read wrong:' + '\t'.join(line) continue if ss != line[0] and check: (s, p, _seg, _ner) = self._process_data(lines, newwords, newwords2, tags=tags) list = self.statistics(s, p, _seg, _ner) wf.write(ss + '\t' + pstr + '\t' + anstr + '\n') all += 1 for l in list: print 'result' + l wf.write(l + "\n\n") if l.split('\t')[2] == anstr: current += 1 print current lines = [] tags = [] newwords = [] newwords2 = [] ss = line[0] pstr = line[1] anstr = line[2] tags.append(line[1]) newwords.append( (line[0], (self.relation[line[1].decode('utf-8')])[0])) newwords2.append( (line[2], (self.relation[line[1].decode('utf-8')])[0])) if line[4].strip() != '': check = True lines.append(line[4].strip()) else: print 'read wrong:' + '\t'.join(line) wf.write('all' + str(all)) wf.write('current' + str(current)) wf.close() def test(self): lines = [] tags = [] newwords = [] for line in sys.stdin: try: line = line.split(' \t') tags.append(line[0].split('\t')[1]) newwords.append((line[0].split('\t')[0], ( self.relation[line[0].split('\t')[1].decode('utf-8')])[0])) lines.append(line[1].strip()) except: print line quit() (s, p, answer) = self._process_data(lines, newwords, None, tags=tags) list = self.statistics(s, p, answer) for l in list: print l.encode('utf-8')
def __init__(self, jieba=False): import re self.de = re.compile(u"[\u4e00-\u9fa5]") self.jieba = jieba self.relation = { u'fuqin': ('PERSON', 'PERSON'), u'erzi': ('PERSON', 'PERSON'), u'nver': ('PERSON', 'PERSON'), u'nvyou': ('PERSON', 'PERSON'), u'nanyou': ('PERSON', 'PERSON'), u'muqin': ('PERSON', 'PERSON'), u'emma': ('PERSON', 'PERSON'), u'zhangfu': ('PERSON', 'PERSON'), u'qizi': ('PERSON', 'PERSON'), u'\u5973\u53cb': ('PERSON', 'PERSON'), u'\u5973\u513f': ('PERSON', 'PERSON'), u'\u59bb\u5b50': ('PERSON', 'PERSON'), u'\u4e08\u592b': ('PERSON', 'PERSON'), u'\u524d\u592b': ('PERSON', 'PERSON'), u'\u7236\u4eb2': ('PERSON', 'PERSON'), u'\u8eab\u9ad8': ('PERSON', 'HEIGHT'), u'\u751f\u65e5': ('PERSON', 'DATE'), u'\u64ad\u51fa\u65f6\u95f4': ('FILM', 'TIME'), u'\u4e3b\u9898\u66f2': ('FILM', 'MUSIC') } self.pos_tagger = { 'a': 0, 'ad': 1, 'ag': 2, 'an': 3, 'b': 4, 'bg': 5, 'c': 6, 'd': 7, 'df': 8, 'dg': 9, 'e': 10, 'en': 11, 'f': 12, 'g': 13, 'h': 14, 'i': 15, 'in': 16, 'j': 17, 'jn': 18, 'k': 19, 'l': 20, 'ln': 21, 'm': 22, 'mg': 23, 'mq': 24, 'n': 25, 'ng': 26, 'nr': 27, 'nrfg': 28, 'nrt': 29, 'ns': 30, 'nt': 31, 'nz': 32, 'o': 33, 'p': 34, 'q': 35, 'qe': 36, 'qg': 37, 'r': 38, 'rg': 39, 'rr': 40, 'rz': 41, 's': 42, 't': 43, 'tg': 44, 'u': 45, 'ud': 46, 'ug': 47, 'uj': 48, 'ul': 49, 'uv': 50, 'uz': 51, 'v': 52, 'vd': 53, 'vg': 54, 'vi': 55, 'vn': 56, 'vq': 57, 'w': 58, 'x': 59, 'y': 60, 'yg': 61, 'z': 62, 'zg': 63, 'a': 64, 'ad': 65, 'ag': 66, 'an': 67, 'b': 68, 'bg': 69, 'c': 70, 'd': 71, 'df': 72, 'dg': 73, 'e': 74, 'en': 75, 'f': 76, 'g': 77, 'h': 78, 'i': 79, 'in': 80, 'j': 81, 'jn': 82, 'k': 83, 'l': 84, 'ln': 85, 'm': 86, 'mg': 87, 'mq': 88, 'n': 89, 'ng': 90, 'nr': 91, 'nrfg': 92, 'nrt': 93, 'ns': 94, 'nt': 95, 'nz': 96, 'o': 97, 'p': 98, 'q': 99, 'qe': 100, 'qg': 101, 'r': 102, 'rg': 103, 'rr': 104, 'rz': 105, 's': 106, 't': 107, 'tg': 108, 'u': 109, 'ud': 110, 'ug': 111, 'uj': 112, 'ul': 113, 'uv': 114, 'uz': 115, 'v': 116, 'vd': 117, 'vg': 118, 'vi': 119, 'vn': 120, 'vq': 121, 'w': 122, 'x': 123, 'y': 124, 'yg': 125, 'z': 126, 'zg': 127, 'a': 128, 'ad': 129, 'ag': 130, 'an': 131, 'b': 132, 'bg': 133, 'c': 134, 'd': 135, 'df': 136, 'dg': 137, 'e': 138, 'en': 139, 'f': 140, 'g': 141, 'h': 142, 'i': 143, 'in': 144, 'j': 145, 'jn': 146, 'k': 147, 'l': 148, 'ln': 149, 'm': 150, 'mg': 151, 'mq': 152, 'n': 153, 'ng': 154, 'nr': 155, 'nrfg': 156, 'nrt': 157, 'ns': 158, 'nt': 159, 'nz': 160, 'o': 161, 'p': 162, 'q': 163, 'qe': 164, 'qg': 165, 'r': 166, 'rg': 167, 'rr': 168, 'rz': 169, 's': 170, 't': 171, 'tg': 172, 'u': 173, 'ud': 174, 'ug': 175, 'uj': 176, 'ul': 177, 'uv': 178, 'uz': 179, 'v': 180, 'vd': 181, 'vg': 182, 'vi': 183, 'vn': 184, 'vq': 185, 'w': 186, 'x': 187, 'y': 188, 'yg': 189, 'z': 190, 'zg': 191, 'a': 192, 'ad': 193, 'ag': 194, 'an': 195, 'b': 196, 'bg': 197, 'c': 198, 'd': 199, 'df': 200, 'dg': 201, 'e': 202, 'en': 203, 'f': 204, 'g': 205, 'h': 206, 'i': 207, 'in': 208, 'j': 209, 'jn': 210, 'k': 211, 'l': 212, 'ln': 213, 'm': 214, 'mg': 215, 'mq': 216, 'n': 217, 'ng': 218, 'nr': 219, 'nrfg': 220, 'nrt': 221, 'ns': 222, 'nt': 223, 'nz': 224, 'o': 225, 'p': 226, 'q': 227, 'qe': 228, 'qg': 229, 'r': 230, 'rg': 231, 'rr': 232, 'rz': 233, 's': 234, 't': 235, 'tg': 236, 'u': 237, 'ud': 238, 'ug': 239, 'uj': 240, 'ul': 241, 'uv': 242, 'uz': 243, 'v': 244, 'vd': 245, 'vg': 246, 'vi': 247, 'vn': 248, 'vq': 249, 'w': 250, 'x': 251, 'y': 252, 'yg': 253, 'z': 254, 'zg': 255, 'eng': 256 } self.m = Merge(True, False) #self.m = Merge(True,True) pass
def test_strings(self): lists = (("b", "e"), ("c", "d"), ("a", "z")) lists = [iter(l) for l in lists] merge = Merge(lists) generator = merge.generator() assert list(generator) == ['a', 'b', 'c', 'd', 'e', 'z']
# 100 days of code day 24: mail merge. from merge import Merge letter = "./Input/Letters/starting_letter.txt" names = "./Input/Names/invited_names.txt" print("Running mail merge\n") merge = Merge(letter, names) count = merge.merge() print(f"Finished. Processed {count} records.")
password=SourceDBFrom["Password"]) filter_source_db_from.run() filter_source_db_to = FilterPlayer(server_id=SourceDBTo["ServerID"], host=SourceDBTo["Host"], db=SourceDBTo["Database"], user=SourceDBTo["User"], password=SourceDBTo["Password"]) filter_source_db_to.run() # 处理玩家重名 rename_player_proc = RenamePlayer(SourceDBFrom, SourceDBTo) rename_player_proc.run() # 公会数据处理 rename_guild_proc = RenameGuild(SourceDBFrom, SourceDBTo) rename_guild_proc.run() # 合并数据 merge_proc = Merge(SourceDBFrom, SourceDBTo, TargetDB) merge_proc.run() # 更新数据 update_proc = UpdateDB(SourceDBFrom, TargetDB) update_proc.run() end = time.time() print "Elapsed %d seconds in total." % (end - begin) print "Merge [%s] and [%s] to [%s] succeed." % ( SourceDBFrom["Database"], SourceDBTo["Database"], TargetDB["Database"])
class Extract: def __init__(self, jieba=False): import re self.de = re.compile(u"[\u4e00-\u9fa5]") self.jieba = jieba self.relation = { u"fuqin": ("PERSON", "PERSON"), u"erzi": ("PERSON", "PERSON"), u"nver": ("PERSON", "PERSON"), u"nvyou": ("PERSON", "PERSON"), u"nanyou": ("PERSON", "PERSON"), u"muqin": ("PERSON", "PERSON"), u"emma": ("PERSON", "PERSON"), u"zhangfu": ("PERSON", "PERSON"), u"qizi": ("PERSON", "PERSON"), u"\u5973\u53cb": ("PERSON", "PERSON"), u"\u5973\u513f": ("PERSON", "PERSON"), u"\u59bb\u5b50": ("PERSON", "PERSON"), u"\u4e08\u592b": ("PERSON", "PERSON"), u"\u524d\u592b": ("PERSON", "PERSON"), u"\u7236\u4eb2": ("PERSON", "PERSON"), u"\u8eab\u9ad8": ("PERSON", "HEIGHT"), u"\u751f\u65e5": ("PERSON", "DATE"), u"\u64ad\u51fa\u65f6\u95f4": ("FILM", "TIME"), u"\u4e3b\u9898\u66f2": ("FILM", "MUSIC"), } self.pos_tagger = { "a": 0, "ad": 1, "ag": 2, "an": 3, "b": 4, "bg": 5, "c": 6, "d": 7, "df": 8, "dg": 9, "e": 10, "en": 11, "f": 12, "g": 13, "h": 14, "i": 15, "in": 16, "j": 17, "jn": 18, "k": 19, "l": 20, "ln": 21, "m": 22, "mg": 23, "mq": 24, "n": 25, "ng": 26, "nr": 27, "nrfg": 28, "nrt": 29, "ns": 30, "nt": 31, "nz": 32, "o": 33, "p": 34, "q": 35, "qe": 36, "qg": 37, "r": 38, "rg": 39, "rr": 40, "rz": 41, "s": 42, "t": 43, "tg": 44, "u": 45, "ud": 46, "ug": 47, "uj": 48, "ul": 49, "uv": 50, "uz": 51, "v": 52, "vd": 53, "vg": 54, "vi": 55, "vn": 56, "vq": 57, "w": 58, "x": 59, "y": 60, "yg": 61, "z": 62, "zg": 63, "a": 64, "ad": 65, "ag": 66, "an": 67, "b": 68, "bg": 69, "c": 70, "d": 71, "df": 72, "dg": 73, "e": 74, "en": 75, "f": 76, "g": 77, "h": 78, "i": 79, "in": 80, "j": 81, "jn": 82, "k": 83, "l": 84, "ln": 85, "m": 86, "mg": 87, "mq": 88, "n": 89, "ng": 90, "nr": 91, "nrfg": 92, "nrt": 93, "ns": 94, "nt": 95, "nz": 96, "o": 97, "p": 98, "q": 99, "qe": 100, "qg": 101, "r": 102, "rg": 103, "rr": 104, "rz": 105, "s": 106, "t": 107, "tg": 108, "u": 109, "ud": 110, "ug": 111, "uj": 112, "ul": 113, "uv": 114, "uz": 115, "v": 116, "vd": 117, "vg": 118, "vi": 119, "vn": 120, "vq": 121, "w": 122, "x": 123, "y": 124, "yg": 125, "z": 126, "zg": 127, "a": 128, "ad": 129, "ag": 130, "an": 131, "b": 132, "bg": 133, "c": 134, "d": 135, "df": 136, "dg": 137, "e": 138, "en": 139, "f": 140, "g": 141, "h": 142, "i": 143, "in": 144, "j": 145, "jn": 146, "k": 147, "l": 148, "ln": 149, "m": 150, "mg": 151, "mq": 152, "n": 153, "ng": 154, "nr": 155, "nrfg": 156, "nrt": 157, "ns": 158, "nt": 159, "nz": 160, "o": 161, "p": 162, "q": 163, "qe": 164, "qg": 165, "r": 166, "rg": 167, "rr": 168, "rz": 169, "s": 170, "t": 171, "tg": 172, "u": 173, "ud": 174, "ug": 175, "uj": 176, "ul": 177, "uv": 178, "uz": 179, "v": 180, "vd": 181, "vg": 182, "vi": 183, "vn": 184, "vq": 185, "w": 186, "x": 187, "y": 188, "yg": 189, "z": 190, "zg": 191, "a": 192, "ad": 193, "ag": 194, "an": 195, "b": 196, "bg": 197, "c": 198, "d": 199, "df": 200, "dg": 201, "e": 202, "en": 203, "f": 204, "g": 205, "h": 206, "i": 207, "in": 208, "j": 209, "jn": 210, "k": 211, "l": 212, "ln": 213, "m": 214, "mg": 215, "mq": 216, "n": 217, "ng": 218, "nr": 219, "nrfg": 220, "nrt": 221, "ns": 222, "nt": 223, "nz": 224, "o": 225, "p": 226, "q": 227, "qe": 228, "qg": 229, "r": 230, "rg": 231, "rr": 232, "rz": 233, "s": 234, "t": 235, "tg": 236, "u": 237, "ud": 238, "ug": 239, "uj": 240, "ul": 241, "uv": 242, "uz": 243, "v": 244, "vd": 245, "vg": 246, "vi": 247, "vn": 248, "vq": 249, "w": 250, "x": 251, "y": 252, "yg": 253, "z": 254, "zg": 255, "eng": 256, } self.m = Merge(True, False) # self.m = Merge(True,True) pass # get the ner using merge and search the relation's Ner def _process_data(self, lines, newwords, n2, tags=None): s = [] p = [] _seg = [] _ner = [] self.m.add_new_words(newwords) if n2 is not None: self.m.add_new_words(n2) for i in xrange(len(lines)): line = lines[i] (line_seg, line_pos, line_ner) = self.m.ner_using_nlpc(line) # (line_ner,line_pos,line_seg,line_dep) = self.m.get_line_info(line,False) if tags is not None: tag = tags[i] k = line_ner.count((self.relation[tag.decode("utf-8")])[1]) if k == 0: continue elif k == 1: if (self.relation[tag.decode("utf-8")])[1] == (self.relation[tag.decode("utf-8")])[1]: continue else: return seg = line_seg.split("\t") pos = line_pos.split("\t") ner = line_ner.split("\t") s.append(newwords[i][0].decode("utf-8")) p.append(tag) _seg.append(seg) _ner.append(ner) return (s, p, _seg, _ner) def statistics(self, newwords, tags, segs, ners): s = [] p = [] answer = [] fromline = [] for i in xrange(len(tags)): tag = tags[i] seg = segs[i] ner = ners[i] _a = [] print " ".join(seg).encode("utf-8") for id in xrange(len(seg)): if tags is not None: if ner[id] == (self.relation[tag.decode("utf-8")])[1]: ll = len(self.de.findall(seg[id])) if ll == 0: ll = len(seg[id]) if (seg[id] != newwords[i]) and (seg[id] not in _a) and (ll > 1) and seg[id].isdigit() == False: print newwords[i].encode("utf-8") + "," + tag.encode("utf-8") + "," + seg[id].encode( "utf-8" ) _a.append(seg[id]) answer.append(seg[id]) s.append(newwords[i]) fromline.append("".join(seg)) p.append(tag) dict = collections.OrderedDict() for i in xrange(len(s)): s[i] = s[i].decode("utf-8") spo = s[i] + p[i] + answer[i] if spo in dict: dict[spo][2] += 1 else: dict[spo] = [] dict[spo].append(s[i] + "\t" + p[i]) dict[spo].append(answer[i]) dict[spo].append(1) dict[spo].append(fromline[i]) # result = {'sp':[[answer,count,line]]} result = collections.OrderedDict() for (k, v) in dict.items(): sp = v[0] if sp in result: if v[2] > result[sp][0][1]: result[sp] = [] ddd = [] ddd.append(v[1]) ddd.append(v[2]) ddd.append(v[3]) result[sp].append(ddd) elif v[2] == result[sp][0][1]: ddd = [] ddd.append(v[1]) ddd.append(v[2]) ddd.append(v[3]) result[sp].append(ddd) else: result[sp] = [] ddd = [] ddd.append(v[1]) ddd.append(v[2]) ddd.append(v[3]) result[sp].append(ddd) list = [] for (k, v) in result.items(): for i in xrange(len(v)): value = v[i] if value[1] == 1: list.append(k + "\t" + value[0] + "\t" + "not sure" + "\t" + value[2]) else: list.append(k + "\t" + value[0] + "\t" + str(value[1]) + "\t" + value[2]) return list def test3(self): lines = [] tags = [] newwords = [] newwords2 = [] for line in sys.stdin: try: line = line.split("\t") if len(line) < 5: print "read wrong:" + "\t".join(line) continue tags.append(line[1]) newwords.append((line[0], (self.relation[line[1].decode("utf-8")])[0])) newwords2.append((line[2], (self.relation[line[1].decode("utf-8")])[0])) if line[4].strip() != "": lines.append(line[4].strip()) else: print "read wrong:" + "\t".join(line) except: print "read wrong:" + "\t".join(line) (s, p, _seg, _ner) = self._process_data(lines, newwords, None, tags=tags) list = self.statistics(s, p, _seg, _ner) for l in list: print l.encode("utf-8") def test2(self): lines = [] tags = [] newwords = [] newwords2 = [] for line in sys.stdin: try: line = line.split("\t") if len(line) < 6: print "read wrong:" + "\t".join(line) continue tags.append(line[1]) newwords.append((line[0], (self.relation[line[1].decode("utf-8")])[0])) newwords2.append((line[2], (self.relation[line[1].decode("utf-8")])[0])) if line[5].strip() != "": lines.append(line[5].strip()) else: print "read wrong:" + "\t".join(line) except: print "read wrong:" + "\t".join(line) (s, p, _seg, _ner) = self._process_data(lines, newwords, newwords2, tags=tags) list = self.statistics(s, p, _seg, _ner) for l in list: print l.encode("utf-8") def test1(self): lines = [] tags = [] newwords = [] newwords2 = [] ss = "" pstr = "" anstr = "" check = False wf = open("result_fanhua_1", "ab") current = 0 all = 0 for line in sys.stdin: line = line.split("\t") if len(line) < 5: print "read wrong:" + "\t".join(line) continue if ss != line[0] and check: (s, p, _seg, _ner) = self._process_data(lines, newwords, newwords2, tags=tags) list = self.statistics(s, p, _seg, _ner) wf.write(ss + "\t" + pstr + "\t" + anstr + "\n") all += 1 for l in list: print "result" + l wf.write(l + "\n\n") if l.split("\t")[2] == anstr: current += 1 print current lines = [] tags = [] newwords = [] newwords2 = [] ss = line[0] pstr = line[1] anstr = line[2] tags.append(line[1]) newwords.append((line[0], (self.relation[line[1].decode("utf-8")])[0])) newwords2.append((line[2], (self.relation[line[1].decode("utf-8")])[0])) if line[4].strip() != "": check = True lines.append(line[4].strip()) else: print "read wrong:" + "\t".join(line) wf.write("all" + str(all)) wf.write("current" + str(current)) wf.close() def test(self): lines = [] tags = [] newwords = [] for line in sys.stdin: try: line = line.split(" \t") tags.append(line[0].split("\t")[1]) newwords.append((line[0].split("\t")[0], (self.relation[line[0].split("\t")[1].decode("utf-8")])[0])) lines.append(line[1].strip()) except: print line quit() (s, p, answer) = self._process_data(lines, newwords, None, tags=tags) list = self.statistics(s, p, answer) for l in list: print l.encode("utf-8")
"X: %.3f m" % self._cursor[0]) textPainter.drawText( QPoint(ViewerWindow.MARGIN[0] / self._ratioX, ViewerWindow.MARGIN[1] / self._ratioY + 260), "Y: %.3f m" % self._cursor[1]) textPainter.drawText( QPoint(ViewerWindow.MARGIN[0] / self._ratioX, ViewerWindow.MARGIN[1] / self._ratioY + 280), "Z: %.2f deg" % (self._cursor[2] / pi * 180)) textPainter.drawText( QPoint(ViewerWindow.MARGIN[0] / self._ratioX, ViewerWindow.MARGIN[1] / self._ratioY + 320), "速度") textPainter.drawText( QPoint(ViewerWindow.MARGIN[0] / self._ratioX, ViewerWindow.MARGIN[1] / self._ratioY + 340), "X-Y: %.3f m/s" % sqrt(self._speed[0]**2 + self._speed[1]**2)) textPainter.drawText( QPoint(ViewerWindow.MARGIN[0] / self._ratioX, ViewerWindow.MARGIN[1] / self._ratioY + 360), "Z: %.2f deg/s" % (self._speed[2] / pi * 180)) if __name__ == '__main__': from omni import Omni from merge import Merge from dash import Dash omni = Omni() merge = Merge() dash = Dash(omni, merge) viewer = Viewer(dash)
def task_download_starts_avtar(): m = Merge() m.download_starts_avtar(query={})
第四部,清洗优酷 ''' ''' 3,category信息: 从清洗好的内容库抽出来存到category库,偶caozuo ''' t = time.time() # merge() # fixyoukuid() # fixletv() # merge_star() # merge_youkustar() # merge_letvstar() #merge_doubanvideo() # merge_letvvideo() # merge_youku_videos() # groupCategories() #偶操作.... # task_download_starts_avtar() # rm_unknown() # clean_category() # task_groupLanguageAndProducer_country() m = Merge() m.fix_youku_starId() print(time.time() - t)
def merge_star(): m = Merge() m.merge_star()
#!/usr/bin/python3 from poll import Poll from merge import Merge pollThread = Poll("http://localhost:35673/devices") pollThread.start() mergeThread = Merge("http://localhost:35673") mergeThread.start()
def __init__(self, jieba=False): import re self.de = re.compile(u"[\u4e00-\u9fa5]") self.jieba = jieba self.relation = { u"fuqin": ("PERSON", "PERSON"), u"erzi": ("PERSON", "PERSON"), u"nver": ("PERSON", "PERSON"), u"nvyou": ("PERSON", "PERSON"), u"nanyou": ("PERSON", "PERSON"), u"muqin": ("PERSON", "PERSON"), u"emma": ("PERSON", "PERSON"), u"zhangfu": ("PERSON", "PERSON"), u"qizi": ("PERSON", "PERSON"), u"\u5973\u53cb": ("PERSON", "PERSON"), u"\u5973\u513f": ("PERSON", "PERSON"), u"\u59bb\u5b50": ("PERSON", "PERSON"), u"\u4e08\u592b": ("PERSON", "PERSON"), u"\u524d\u592b": ("PERSON", "PERSON"), u"\u7236\u4eb2": ("PERSON", "PERSON"), u"\u8eab\u9ad8": ("PERSON", "HEIGHT"), u"\u751f\u65e5": ("PERSON", "DATE"), u"\u64ad\u51fa\u65f6\u95f4": ("FILM", "TIME"), u"\u4e3b\u9898\u66f2": ("FILM", "MUSIC"), } self.pos_tagger = { "a": 0, "ad": 1, "ag": 2, "an": 3, "b": 4, "bg": 5, "c": 6, "d": 7, "df": 8, "dg": 9, "e": 10, "en": 11, "f": 12, "g": 13, "h": 14, "i": 15, "in": 16, "j": 17, "jn": 18, "k": 19, "l": 20, "ln": 21, "m": 22, "mg": 23, "mq": 24, "n": 25, "ng": 26, "nr": 27, "nrfg": 28, "nrt": 29, "ns": 30, "nt": 31, "nz": 32, "o": 33, "p": 34, "q": 35, "qe": 36, "qg": 37, "r": 38, "rg": 39, "rr": 40, "rz": 41, "s": 42, "t": 43, "tg": 44, "u": 45, "ud": 46, "ug": 47, "uj": 48, "ul": 49, "uv": 50, "uz": 51, "v": 52, "vd": 53, "vg": 54, "vi": 55, "vn": 56, "vq": 57, "w": 58, "x": 59, "y": 60, "yg": 61, "z": 62, "zg": 63, "a": 64, "ad": 65, "ag": 66, "an": 67, "b": 68, "bg": 69, "c": 70, "d": 71, "df": 72, "dg": 73, "e": 74, "en": 75, "f": 76, "g": 77, "h": 78, "i": 79, "in": 80, "j": 81, "jn": 82, "k": 83, "l": 84, "ln": 85, "m": 86, "mg": 87, "mq": 88, "n": 89, "ng": 90, "nr": 91, "nrfg": 92, "nrt": 93, "ns": 94, "nt": 95, "nz": 96, "o": 97, "p": 98, "q": 99, "qe": 100, "qg": 101, "r": 102, "rg": 103, "rr": 104, "rz": 105, "s": 106, "t": 107, "tg": 108, "u": 109, "ud": 110, "ug": 111, "uj": 112, "ul": 113, "uv": 114, "uz": 115, "v": 116, "vd": 117, "vg": 118, "vi": 119, "vn": 120, "vq": 121, "w": 122, "x": 123, "y": 124, "yg": 125, "z": 126, "zg": 127, "a": 128, "ad": 129, "ag": 130, "an": 131, "b": 132, "bg": 133, "c": 134, "d": 135, "df": 136, "dg": 137, "e": 138, "en": 139, "f": 140, "g": 141, "h": 142, "i": 143, "in": 144, "j": 145, "jn": 146, "k": 147, "l": 148, "ln": 149, "m": 150, "mg": 151, "mq": 152, "n": 153, "ng": 154, "nr": 155, "nrfg": 156, "nrt": 157, "ns": 158, "nt": 159, "nz": 160, "o": 161, "p": 162, "q": 163, "qe": 164, "qg": 165, "r": 166, "rg": 167, "rr": 168, "rz": 169, "s": 170, "t": 171, "tg": 172, "u": 173, "ud": 174, "ug": 175, "uj": 176, "ul": 177, "uv": 178, "uz": 179, "v": 180, "vd": 181, "vg": 182, "vi": 183, "vn": 184, "vq": 185, "w": 186, "x": 187, "y": 188, "yg": 189, "z": 190, "zg": 191, "a": 192, "ad": 193, "ag": 194, "an": 195, "b": 196, "bg": 197, "c": 198, "d": 199, "df": 200, "dg": 201, "e": 202, "en": 203, "f": 204, "g": 205, "h": 206, "i": 207, "in": 208, "j": 209, "jn": 210, "k": 211, "l": 212, "ln": 213, "m": 214, "mg": 215, "mq": 216, "n": 217, "ng": 218, "nr": 219, "nrfg": 220, "nrt": 221, "ns": 222, "nt": 223, "nz": 224, "o": 225, "p": 226, "q": 227, "qe": 228, "qg": 229, "r": 230, "rg": 231, "rr": 232, "rz": 233, "s": 234, "t": 235, "tg": 236, "u": 237, "ud": 238, "ug": 239, "uj": 240, "ul": 241, "uv": 242, "uz": 243, "v": 244, "vd": 245, "vg": 246, "vi": 247, "vn": 248, "vq": 249, "w": 250, "x": 251, "y": 252, "yg": 253, "z": 254, "zg": 255, "eng": 256, } self.m = Merge(True, False) # self.m = Merge(True,True) pass
if __name__ == "__main__": parser = argparse.ArgumentParser(description="prepare_for_neuronet") parser.add_argument('--input', help='Input directory of bunches (annotation files)') args = parser.parse_args() input_dir = args.input input_dir, output_dir = Utils.init_paths_neuroner(input_dir) annotators = ['eugenia', 'victoria', 'isabel', 'carmen'] variable_dict, variable_hash_dict, section_dict = Entities.get_final_annotators_entities( input_dir, output_dir, t_number=False) merged_variables, _ = Merge.merge_entities(variable_dict) merged_sections, _ = Merge.merge_entities(section_dict) merged_variables_hash = Merge.merge_hash(variable_hash_dict) section_variable = Merge.merge_variables_sections(merged_variables, merged_sections) Write.accepted_variables_neuroner(section_variable, merged_variables_hash, output_dir) print("Done")
if __name__ == "__main__": parser = argparse.ArgumentParser(description="re_annotation") parser.add_argument('--bunch', help='Which set is going to compare') args = parser.parse_args() bunch = args.bunch input_dir, output_dir = Utils.init_paths() annotators = Utils.annators_name(input_dir) variable_dict, variable_hash_dict, section_dict= Entities.get_annotators_entities(bunch, annotators, input_dir, t_number=False) merged_variables, owner_file = Merge.merge_entities(variable_dict) merged_variables = Entities.sorted_entities(merged_variables) merged_sections, _ = Merge.merge_entities(section_dict) merged_variables_hash = Merge.merge_hash(variable_hash_dict) ctakes_dir = input_dir.replace("input", "ctakes_output") ctakes_variables, ctakes_variables_hash, ctakes_sections = Entities.get_ctakes_entities(bunch, ctakes_dir, t_number=False) merged_variables, merged_variables_hash, merged_sections = Merge.merge_ctakes_annotators(merged_variables, merged_variables_hash, merged_sections, ctakes_variables, ctakes_variables_hash,
def act_merge(self): merge = Merge(tmpdir=self.processed_dir, section_names=self.sections) merge.process(self.filepath_out) opendocument(self.filepath_out)