コード例 #1
0
ファイル: paper_hash.py プロジェクト: amumu/paperlens
connection = MySQLdb.connect(host = "127.0.0.1", user = "******", passwd = "paper1ens", db = "paperlens")
cursor = connection.cursor()
connection.commit()

try:
    paper_hash = dict()
    cursor.execute("select id,title from paper")
    n = 0
    while 1:
        row = cursor.fetchone()
        if row == None:
            break
        paper_id = int(row[0])
        title = row[1]
        paper_hash[paper_id] = paperlens_import.intHash(title.lower())
        n = n + 1
        if n % 10000 == 0:
            print str(n)

    n = 0
    for (paper_id, hash_value) in paper_hash.items():
        cursor.execute("update paper set hashvalue=%s where id=%s",(hash_value,paper_id))
        n = n + 1
        if n % 10000 == 0:
            print str(n)
    connection.commit()
    cursor.close()
    connection.close()
except MySQLdb.Error, e:
    print e.args[0], e.args[1]
コード例 #2
0
ファイル: import.py プロジェクト: amumu/paperlens
connection.commit()


try:
    data = open("../../../data/citeseer.txt")
    cursor.execute("truncate table paper_citeseer")
    cursor.execute("truncate table cite_citeseer")
    citeseer_id_map = dict()
    title = ''
    citeseer_id = ''
    n = 0
    for line in data:
        (key, value) = Extract(line)
        if line.find("<record>") >= 0:
            if len(title) > 20:
                hashvalue = paperlens_import.intHash(title.lower())
                cursor.execute("select count(*),id from paper where hashvalue=%s",(hashvalue))
                row = cursor.fetchone()
                if int(row[0]) == 1:
                    paper_id = int(row[1])
                    if citeseer_id not in citeseer_id_map:
                        citeseer_id_map[citeseer_id] = paper_id
                    #cursor.execute("replace into paper_citeseer (paper_id, citeseer_key) values (%s, %s)",(paper_id, citeseer_id))

                    if n % 10000 == 0:
                        print n, title, citeseer_id
                    n = n + 1

            title = ''
            citeseer_id = ''
        if key == "<dc:title>":
コード例 #3
0
ファイル: import_id.py プロジェクト: amumu/paperlens
                             passwd="paper1ens",
                             db="paperlens")
cursor = connection.cursor()
connection.commit()

try:
    data = open("../../../data/citeseer.txt")
    cursor.execute("truncate table paper_citeseer")
    title = ''
    citeseer_id = ''
    n = 0
    for line in data:
        (key, value) = Extract(line)
        if line.find("<record>") >= 0:
            if len(title) > 20:
                hashvalue = paperlens_import.intHash(title.lower())
                cursor.execute(
                    "select count(*),id from paper where hashvalue=%s",
                    (hashvalue))
                row = cursor.fetchone()
                if int(row[0]) == 1:
                    paper_id = int(row[1])
                    cursor.execute(
                        "replace into paper_citeseer (paper_id, citeseer_key) values (%s, %s)",
                        (paper_id, citeseer_id))

                    if n % 10000 == 0:
                        print n, title, citeseer_id
                    n = n + 1

            title = ''