コード例 #1
0
def main():
    filename,limit, output_format, wordtype, version= grabargs()
    #print "#--",filename,limit,display_format, wordtype;
    #exit();
    try:
        fl = open(filename);
    except:
        print " Error :No such file or directory: %s" % filename
        sys.exit(0)
    #~ print "#",filename,limit, output_format, wordtype, version
    #~ sys.exit()

    #display_format="txt"

    line = fl.readline().decode("utf8");
    text = u""
    noun_table = [];
    nb_field = 2;
    while line :
        line = line.strip('\n')#.strip()
        if not line.startswith("#"):
            liste = line.split(Separator);
            if len(liste) >= nb_field:
                noun_table.append(liste);

        line = fl.readline().decode("utf8");
    fl.close();

    #print "#", (u'\t'.join(field_id.keys())).encode('utf8');
    model = 0;
    if output_format == "sql":
        import sqldict
        mydict = sqldict.SqlDict(wordtype, version);
    elif output_format == "xml":
        import xmldict
        mydict = xmldict.XmlDict(wordtype, version);
    elif output_format == "stardict":
        import stardict
        mydict = stardict.StarDict(wordtype, version);
    elif output_format == "tags":
        import tagsdict
        mydict = tagsdict.TagsDict(wordtype, version);
    elif output_format == "spell":
        import spelldict
        mydict = spelldict.SpellDict(wordtype, version);
    else:
        import csvdict
        mydict = csvdict.CsvDict(wordtype, version)    
    # create header
    h = mydict.add_header()
    if h:
        print h.encode('utf8')
    for tuple_noun in noun_table[:limit]:
        l = mydict.add_record(tuple_noun)
        if l:
            print l.encode('utf8')
    # create footer
    f = mydict.add_footer()
    if f:
        print f.encode('utf8')
コード例 #2
0
    def __init__(self, model=None):
        self.stops = set(stopwords.words('english'))

        path = '/Users/dpmorg_mac/dpm_gdrive/insight/polibot/depedencies/'
        if model is None:
            self.model = sqldict.SqlDict(path + "GoogleNews-vectors.db")
        else:
            pretrained = path + 'GoogleNews-vectors-negative300.bin.gz'
            self.model = Word2Vec.load_word2vec_format(pretrained, binary=True)

        self.nlp = spacy.load('en')
コード例 #3
0
def main():
    filename, limit, output_format, version = grabargs()
    try:
        fl = open(filename)
    except:
        print " Error :No such file or directory: %s" % filename
        sys.exit(0)

    verb_field_number = 2
    verb_cat_field_number = 3

    line = fl.readline().decode("utf")
    text = u""
    verb_table = []
    nb_field = 12
    if output_format == "sql":
        import sqldict
        mydict = sqldict.SqlDict(version)
    elif output_format == "xml":
        import xmldict
        mydict = xmldict.XmlDict(version)
    elif output_format == "stardict":
        import stardict
        mydict = stardict.StarDict(version)
    elif output_format == "spell":
        import spelldict
    elif output_format == "tags":
        import tagsdict
        mydict = tagsdict.TagsDict(version)
    else:
        import csvdict
        mydict = csvdict.CsvDict(version)
    while line:
        line = line.strip('\n').strip()
        if not line.startswith("#"):
            liste = line.split("\t")
            if len(liste) >= nb_field:
                verb_table.append(liste)

        line = fl.readline().decode("utf8")
    fl.close()
    # create header
    print mydict.add_header().encode('utf8')

    for tuple_verb in verb_table[:limit]:
        #~ verb_dict = decode_tuple_verb(tuple_verb);
        print mydict.add_record(tuple_verb).encode('utf8')
    # create footer
    print mydict.add_footer().encode('utf8')
コード例 #4
0
def factory(output_format, version):
    import csvdict
    mydict = csvdict.CsvDict(version)
    if output_format == "sql":
        import sqldict
        mydict = sqldict.SqlDict(version)
    elif output_format == "xml":
        import xmldict
        mydict = xmldict.XmlDict(version)
    elif output_format == "stardict":
        import stardict
        mydict = stardict.StarDict(version)
    elif output_format == "spell":
        import spelldict
    elif output_format == "tags":
        import tagsdict
        mydict = tagsdict.TagsDict(version)
    elif output_format == "check":
        import checkdict
        mydict = checkdict.checkDict(version)
    else:
        import csvdict
        mydict = csvdict.CsvDict(version)
    return mydict
コード例 #5
0
ファイル: gen_noun_dict.py プロジェクト: veeshi/arramooz
def main():
    args = grabargs()
    filename = args.filename
    limit = args.limit
    output_format = args.outformat
    wordtype = args.wordtype
    version = args.version
    #print "#--",filename,limit,display_format, wordtype;
    #exit();
    try:
        fl = open(filename, encoding='utf-8')
    except:
        print(" Error :No such file or directory: %s" % filename)
        sys.exit(0)
    #~ print "#",filename,limit, output_format, wordtype, version
    #~ sys.exit()

    #display_format="txt"

    line = fl.readline()
    text = u""
    noun_table = []
    nb_field = 2
    while line:
        line = line.strip('\n')  #.strip()
        if not line.startswith("#"):
            liste = line.split(Separator)
            if len(liste) >= nb_field:
                noun_table.append(liste)

        line = fl.readline()
    fl.close()

    #print "#", (u'\t'.join(field_id.keys())).encode('utf8');
    model = 0
    if output_format == "sql":
        import sqldict
        mydict = sqldict.SqlDict(wordtype, version)
    elif output_format == "xml":
        import xmldict
        mydict = xmldict.XmlDict(wordtype, version)
    elif output_format == "stardict":
        import stardict
        mydict = stardict.StarDict(wordtype, version)
    elif output_format == "tags":
        import tagsdict
        mydict = tagsdict.TagsDict(wordtype, version)
    elif output_format == "spell":
        import spelldict
        mydict = spelldict.SpellDict(wordtype, version)
    elif output_format == "check":
        import checkdict
        mydict = checkdict.checkDict(version)
    else:
        import csvdict
        mydict = csvdict.CsvDict(wordtype, version)
    # create header
    h = ""  # mydict.add_header()
    if h:
        print(h)
    for tuple_noun in noun_table[:limit]:
        l = mydict.add_record(tuple_noun)
        if l:
            print(l)
    # create footer
    f = mydict.add_footer()
    if f:
        print(f)
コード例 #6
0
def main():

    args = grabargs()
    #~ filename, outputformat,  allforms, version=grabargs()
    filename = args.filename
    outputformat = args.outformat
    allforms = True if args.all else False
    version = args.version if args.version else ""
    print(filename, outputformat, allforms, version)
    #~ sys.exit()

    outputformat = string.lower(outputformat)
    if outputformat not in ('csv', 'python', 'py', 'sql'):
        outputformat = 'csv'
    print "--~# generated format", outputformat
    print "--~#file name ", filename
    print "--~#output format", outputformat
    #~sys.exit()
    if outputformat in ('python', 'py'):
        import pydict
        mydict = pydict.PyDict(allforms, version)
    elif outputformat == 'sql':
        import sqldict
        mydict = sqldict.SqlDict(allforms, version)
    else:
        import csvdict
        mydict = csvdict.CsvDict(allforms, version)

    if (not filename):
        usage()
        sys.exit(0)
    option = ""
    try:
        fl = open(filename)
    except:
        print " Error :No such file or directory: %s" % filename
        return None
    line = fl.readline().decode("utf8")
    text = u""
    limit = 10000
    nb_fields = 9
    stop_table = []
    while line:
        line = line.strip('\n').strip()
        if not line.startswith("#"):
            liste = line.split("\t")
            if len(liste) >= nb_fields:
                stop_table.append(liste)

        line = fl.readline().decode("utf8")
    fl.close()
    # create header
    line = mydict.add_header().encode('utf8')
    if line: print line
    for tuple_stop in stop_table[:limit]:
        line = mydict.add_record(tuple_stop).encode('utf8')
        if line:
            print line
    # create footer
    line = mydict.add_footer().encode('utf8')
    if line: print line