def main(): filename,limit, output_format, wordtype, version= grabargs() #print "#--",filename,limit,display_format, wordtype; #exit(); try: fl = open(filename); except: print " Error :No such file or directory: %s" % filename sys.exit(0) #~ print "#",filename,limit, output_format, wordtype, version #~ sys.exit() #display_format="txt" line = fl.readline().decode("utf8"); text = u"" noun_table = []; nb_field = 2; while line : line = line.strip('\n')#.strip() if not line.startswith("#"): liste = line.split(Separator); if len(liste) >= nb_field: noun_table.append(liste); line = fl.readline().decode("utf8"); fl.close(); #print "#", (u'\t'.join(field_id.keys())).encode('utf8'); model = 0; if output_format == "sql": import sqldict mydict = sqldict.SqlDict(wordtype, version); elif output_format == "xml": import xmldict mydict = xmldict.XmlDict(wordtype, version); elif output_format == "stardict": import stardict mydict = stardict.StarDict(wordtype, version); elif output_format == "tags": import tagsdict mydict = tagsdict.TagsDict(wordtype, version); elif output_format == "spell": import spelldict mydict = spelldict.SpellDict(wordtype, version); else: import csvdict mydict = csvdict.CsvDict(wordtype, version) # create header h = mydict.add_header() if h: print h.encode('utf8') for tuple_noun in noun_table[:limit]: l = mydict.add_record(tuple_noun) if l: print l.encode('utf8') # create footer f = mydict.add_footer() if f: print f.encode('utf8')
def __init__(self, model=None): self.stops = set(stopwords.words('english')) path = '/Users/dpmorg_mac/dpm_gdrive/insight/polibot/depedencies/' if model is None: self.model = sqldict.SqlDict(path + "GoogleNews-vectors.db") else: pretrained = path + 'GoogleNews-vectors-negative300.bin.gz' self.model = Word2Vec.load_word2vec_format(pretrained, binary=True) self.nlp = spacy.load('en')
def main(): filename, limit, output_format, version = grabargs() try: fl = open(filename) except: print " Error :No such file or directory: %s" % filename sys.exit(0) verb_field_number = 2 verb_cat_field_number = 3 line = fl.readline().decode("utf") text = u"" verb_table = [] nb_field = 12 if output_format == "sql": import sqldict mydict = sqldict.SqlDict(version) elif output_format == "xml": import xmldict mydict = xmldict.XmlDict(version) elif output_format == "stardict": import stardict mydict = stardict.StarDict(version) elif output_format == "spell": import spelldict elif output_format == "tags": import tagsdict mydict = tagsdict.TagsDict(version) else: import csvdict mydict = csvdict.CsvDict(version) while line: line = line.strip('\n').strip() if not line.startswith("#"): liste = line.split("\t") if len(liste) >= nb_field: verb_table.append(liste) line = fl.readline().decode("utf8") fl.close() # create header print mydict.add_header().encode('utf8') for tuple_verb in verb_table[:limit]: #~ verb_dict = decode_tuple_verb(tuple_verb); print mydict.add_record(tuple_verb).encode('utf8') # create footer print mydict.add_footer().encode('utf8')
def factory(output_format, version): import csvdict mydict = csvdict.CsvDict(version) if output_format == "sql": import sqldict mydict = sqldict.SqlDict(version) elif output_format == "xml": import xmldict mydict = xmldict.XmlDict(version) elif output_format == "stardict": import stardict mydict = stardict.StarDict(version) elif output_format == "spell": import spelldict elif output_format == "tags": import tagsdict mydict = tagsdict.TagsDict(version) elif output_format == "check": import checkdict mydict = checkdict.checkDict(version) else: import csvdict mydict = csvdict.CsvDict(version) return mydict
def main(): args = grabargs() filename = args.filename limit = args.limit output_format = args.outformat wordtype = args.wordtype version = args.version #print "#--",filename,limit,display_format, wordtype; #exit(); try: fl = open(filename, encoding='utf-8') except: print(" Error :No such file or directory: %s" % filename) sys.exit(0) #~ print "#",filename,limit, output_format, wordtype, version #~ sys.exit() #display_format="txt" line = fl.readline() text = u"" noun_table = [] nb_field = 2 while line: line = line.strip('\n') #.strip() if not line.startswith("#"): liste = line.split(Separator) if len(liste) >= nb_field: noun_table.append(liste) line = fl.readline() fl.close() #print "#", (u'\t'.join(field_id.keys())).encode('utf8'); model = 0 if output_format == "sql": import sqldict mydict = sqldict.SqlDict(wordtype, version) elif output_format == "xml": import xmldict mydict = xmldict.XmlDict(wordtype, version) elif output_format == "stardict": import stardict mydict = stardict.StarDict(wordtype, version) elif output_format == "tags": import tagsdict mydict = tagsdict.TagsDict(wordtype, version) elif output_format == "spell": import spelldict mydict = spelldict.SpellDict(wordtype, version) elif output_format == "check": import checkdict mydict = checkdict.checkDict(version) else: import csvdict mydict = csvdict.CsvDict(wordtype, version) # create header h = "" # mydict.add_header() if h: print(h) for tuple_noun in noun_table[:limit]: l = mydict.add_record(tuple_noun) if l: print(l) # create footer f = mydict.add_footer() if f: print(f)
def main(): args = grabargs() #~ filename, outputformat, allforms, version=grabargs() filename = args.filename outputformat = args.outformat allforms = True if args.all else False version = args.version if args.version else "" print(filename, outputformat, allforms, version) #~ sys.exit() outputformat = string.lower(outputformat) if outputformat not in ('csv', 'python', 'py', 'sql'): outputformat = 'csv' print "--~# generated format", outputformat print "--~#file name ", filename print "--~#output format", outputformat #~sys.exit() if outputformat in ('python', 'py'): import pydict mydict = pydict.PyDict(allforms, version) elif outputformat == 'sql': import sqldict mydict = sqldict.SqlDict(allforms, version) else: import csvdict mydict = csvdict.CsvDict(allforms, version) if (not filename): usage() sys.exit(0) option = "" try: fl = open(filename) except: print " Error :No such file or directory: %s" % filename return None line = fl.readline().decode("utf8") text = u"" limit = 10000 nb_fields = 9 stop_table = [] while line: line = line.strip('\n').strip() if not line.startswith("#"): liste = line.split("\t") if len(liste) >= nb_fields: stop_table.append(liste) line = fl.readline().decode("utf8") fl.close() # create header line = mydict.add_header().encode('utf8') if line: print line for tuple_stop in stop_table[:limit]: line = mydict.add_record(tuple_stop).encode('utf8') if line: print line # create footer line = mydict.add_footer().encode('utf8') if line: print line