Esempio n. 1
0
def processFile(con,query,file,isType):
    i = 0
    records=[]
    file.readline() # skip the first line.
    while True:
        line = file.readline()
        if not line:
            if len(records)>0:
                mydb.executeQueryRecords(con,query,records,False)
                records=[]
            break
        line=line.decode('raw_unicode_escape')
        ll = line.split(' ')
        record = None
        if isType:
            record=(ll[0],ll[2])
        else:
            record = (ll[0],ll[1],ll[2])
        records.append(record)
        i+=1
        if i%10000 == 0:
            print '%d records inserted!' % i
        if len(records)>100:
            mydb.executeQueryRecords(con,query,records,False)
            records=[]
Esempio n. 2
0
def processFile(con, query, file, isType):
    i = 0
    records = []
    file.readline()  # skip the first line.
    while True:
        line = file.readline()
        if not line:
            if len(records) > 0:
                mydb.executeQueryRecords(con, query, records, False)
                records = []
            break
        line = line.decode('raw_unicode_escape')
        ll = line.split(' ')
        record = None
        if isType:
            record = (ll[0], ll[2])
        else:
            record = (ll[0], ll[1], ll[2])
        records.append(record)
        i += 1
        if i % 10000 == 0:
            print '%d records inserted!' % i
        if len(records) > 100:
            mydb.executeQueryRecords(con, query, records, False)
            records = []
Esempio n. 3
0
def insertTranslation(con, table_name, fileName, lang):
    cn_lang = {'EN': 'en', 'RU': 'ru', 'FA': 'fa', 'ES': 'es'}
    prefix = '/c/' + cn_lang[lang] + '/'
    en_prefix = '/c/' + cn_lang['EN'] + '/'

    f = open(fileName, 'r')
    f.readline()
    records = []
    buffer_size = 100
    query = 'insert into __table_name__(rel,s,e) values(%s,%s,%s)'
    query = query.replace('__table_name__', table_name)
    i = 0
    for line in f:
        line = line.decode('utf8')
        ll = line.split('\t')
        tri = ll[0][4:-1]
        tris = tri.split(',')

        # check for language
        if not (tris[0] == '/r/TranslationOf/' and
                (tris[1].startswith(prefix)
                 and tris[2].startswith(en_prefix))):
            continue

        i += 1
        if i % 10000 == 0:
            print i

        record = (tris[0], tris[1], tris[2])
        records.append(record)

        if len(records) > buffer_size:
            mydb.executeQueryRecords(con, query, records, False)
            records = []

    if len(records) > 0:
        mydb.executeQueryRecords(con, query, records, False)
        records = []
    print i
    return i
def insertTranslation(con, table_name, fileName, lang):
    cn_lang = {'EN': 'en', 'RU': 'ru', 'FA': 'fa', 'ES': 'es'}
    prefix = '/c/' + cn_lang[lang] + '/'
    en_prefix = '/c/' + cn_lang['EN'] + '/'

    f = open(fileName, 'r')
    f.readline()
    records = []
    buffer_size = 100
    query = 'insert into __table_name__(rel,s,e) values(%s,%s,%s)'
    query = query.replace('__table_name__', table_name)
    i = 0
    for line in f:
        line = line.decode('utf8')
        ll = line.split('\t')
        tri = ll[0][4:-1]
        tris = tri.split(',')

        # check for language
        if not (tris[0] == '/r/TranslationOf/' and
                (tris[1].startswith(prefix) and
                 tris[2].startswith(en_prefix))):
            continue

        i += 1
        if i % 10000 == 0:
            print i

        record = (tris[0], tris[1], tris[2])
        records.append(record)

        if len(records) > buffer_size:
            mydb.executeQueryRecords(con, query, records, False)
            records = []

    if len(records) > 0:
        mydb.executeQueryRecords(con, query, records, False)
        records = []
    print i
    return i
Esempio n. 5
0
                native_property = '__total__'
            else:
                native_property = findLabel(originTree, property[1:-1], slang,
                                            1)
                if native_property is None:
                    native_property = findLabel(originTree, property[1:-1],
                                                slang, 2)
                if native_property is None:
                    native_property = 'None'

            hit = properties[property]
            record = (typeName, property, native_type, native_property, hit,
                      total)
            records.append(record)
            if len(records) > 100:
                mydb.executeQueryRecords(con, insertQuery, records, False)
                records = []
        # break
    if len(records) > 0:
        mydb.executeQueryRecords(con, insertQuery, records, False)
        records = []
else:  # lang == 'FA'
    typeFile = open('types_fa.txt', 'r')
    while True:
        line = typeFile.readline()
        if not line:
            break
        typeName = line.strip()
        properties = calculate.calculate(typeName, lang)
        total = properties['__total__']
        for property in properties:
Esempio n. 6
0
        for property in properties:
            native_property = ''
            if property == '__total__':
                native_property = '__total__'
            else:
                native_property = findLabel(originTree,property[1:-1],slang,1)
                if native_property == None:
                    native_property = findLabel(originTree,property[1:-1],slang,2)
                if native_property == None:
                    native_property = 'None'

            hit = properties[property]
            record = (typeName,property,native_type,native_property,hit,total)
            records.append(record)
            if len(records)>100:
                mydb.executeQueryRecords(con,insertQuery,records,False)
                records=[]
        #break
    if len(records)>0:
        mydb.executeQueryRecords(con,insertQuery,records,False)
        records=[]
else: # lang == 'FA'
    typeFile = open('types_fa.txt','r')
    while True:
        line = typeFile.readline()
        if not line:
            break
        typeName = line.strip()
        properties = calculate.calculate(typeName,lang)
        total = properties['__total__']
        for property in properties: