Exemple #1
0
def delete_package(pkg_id):
    """Deletes the package from database

    @param pkg_id: Id of the package in database
    """
    sql = 'delete from document where id = ' + str(pkg_id)
    util.executeSQL(conn, sql)
    return
Exemple #2
0
def delete_package(pkg_id):
    """Deletes the package from database

    @param pkg_id: Id of the package in database
    """
    sql = 'delete from document where id = ' + str(pkg_id)
    util.executeSQL(conn, sql)
    return
Exemple #3
0
def delete_keywords(pkg_id):
    """Deletes the keywords related to package from database

    @param pkg_id: Id of the package in database
    """
    sql = 'delete from keywords where doc_id = ' + str(pkg_id)
    util.executeSQL(conn, sql)
    return
Exemple #4
0
def delete_keywords(pkg_id):
    """Deletes the keywords related to package from database

    @param pkg_id: Id of the package in database
    """
    sql = 'delete from keywords where doc_id = ' + str(pkg_id)
    util.executeSQL(conn, sql)
    return
Exemple #5
0
def populate_clean_keywords():
    """Populates the clean_keywords table."""
    try:
        print 'populate clean keywords table'
        sql = 'insert into clean_keywords(word, count) SELECT name,COUNT(*) as count FROM keywords GROUP BY name'
        print sql
        util.executeSQL(conn, sql)
    except Exception as e:
        print e
Exemple #6
0
def populate_clean_keywords():
    """Populates the clean_keywords table."""
    try:
        print 'populate clean keywords table'
        sql = 'insert into clean_keywords(word, count) SELECT name,COUNT(*) as count FROM keywords GROUP BY name'
        print sql
        util.executeSQL(conn, sql)
    except Exception as e:
        print e
Exemple #7
0
def insert_package(pkg_name, pkg_id):
    """Inserts the package into the table.

    @param pkg_name: name of the package
    @param pkg_id:  Id of the package in database
    """
    sql = "INSERT INTO document VALUES (" + str(pkg_id) + ',"' + pkg_name + '", null, 0)'
    print sql
    try:
        util.executeSQL(conn, sql)
    except Exception as e:
        print e
Exemple #8
0
def insert_package(pkg_name, pkg_id):
    """Inserts the package into the table.

    @param pkg_name: name of the package
    @param pkg_id:  Id of the package in database
    """
    sql = "INSERT INTO document VALUES (" + str(
        pkg_id) + ',"' + pkg_name + '", null, 0)'
    print sql
    try:
        util.executeSQL(conn, sql)
    except Exception as e:
        print e
Exemple #9
0
def insert_keyword(kwd, pkg_id):
    """Inserts the keyword into the table.

    @param kwd: keyword
    @param pkg_id: package id
    """
    global kwd_index
    try:
        sql = 'INSERT INTO keywords VALUES (' + str(kwd_index) + ',"' + kwd + '",' + str(pkg_id) + ',0)'
        print sql
        util.executeSQL(conn, sql)
        kwd_index += 1
    except Exception as e:
        print e
    return
Exemple #10
0
def insert_keyword(kwd, pkg_id):
    """Inserts the keyword into the table.

    @param kwd: keyword
    @param pkg_id: package id
    """
    global kwd_index
    try:
        sql = 'INSERT INTO keywords VALUES (' + str(
            kwd_index) + ',"' + kwd + '",' + str(pkg_id) + ',0)'
        print sql
        util.executeSQL(conn, sql)
        kwd_index += 1
    except Exception as e:
        print e
    return
Exemple #11
0
def delete_table_data():
    """Deletes the data from a table."""
    try:
        print 'delete existing data'
        sql = 'delete from document'
        sql1 = 'delete from clean_keywords'
        sql2 = 'delete from keywords'
        util.executeSQL(conn, sql)  # delete the existing data.
        util.executeSQL(conn, sql1)
        util.executeSQL(conn, sql2)
    except Exception as e:
        print e
Exemple #12
0
def delete_table_data():
    """Deletes the data from a table."""
    try:
        print 'delete existing data'
        sql = 'delete from document'
        sql1 = 'delete from clean_keywords'
        sql2 = 'delete from keywords'
        util.executeSQL(conn, sql)  # delete the existing data.
        util.executeSQL(conn, sql1)
        util.executeSQL(conn, sql2)
    except Exception as e:
        print e
Exemple #13
0
def has_enough_keywords(pkg_id):
    """Checks whether the package has enough keywords to process.

    If the repository has only one keyword, then classifier would treat it
    as an origin and the result produced would be of no significance.

    @param pkg_id: Id of the package in database
    """
    sql = 'select count(*) from keywords where doc_id = ' + str(pkg_id)
    res = util.executeSQL(conn, sql)
    words_count = 0
    for row in res:
        words_count = int(row[0])
    if words_count < 2:
        delete_package(pkg_id)
        delete_keywords(pkg_id)
        return False
    return True
Exemple #14
0
def has_enough_keywords(pkg_id):
    """Checks whether the package has enough keywords to process.

    If the repository has only one keyword, then classifier would treat it
    as an origin and the result produced would be of no significance.

    @param pkg_id: Id of the package in database
    """
    sql = 'select count(*) from keywords where doc_id = ' + str(pkg_id)
    res = util.executeSQL(conn, sql)
    words_count = 0
    for row in res:
        words_count = int(row[0])
    if words_count < 2:
        delete_package(pkg_id)
        delete_keywords(pkg_id)
        return False
    return True
Exemple #15
0
def calculate_TF_IDF():
    """Calculates the term frequency and inverse document frequency of
    the keywords and store them in the database tables.
    """
    conn = util.getDBConnection()
    sql = "select word from clean_keywords"
    print sql
    rows = util.executeSQL(conn, sql)
    word_tf = {}
    word_df = {}
    for row in rows:
        word = row[0]
        sql1 = "select doc_id from keywords where name='" + word + "'"
        print sql1
        res = util.executeSQL(conn, sql1)
        for row1 in res:
            pkg_id = row1[0]
            key = word + ':' + str(pkg_id)
            if key in word_tf:
                tf_count = word_tf[key]
                word_tf[key] = tf_count + 1
            else:
                word_tf[key] = 1
                if word in word_df:
                    df_count = word_df[word]
                    word_df[word] = df_count + 1
                else:
                    word_df[word] = 1

    for word, df in word_df.iteritems():
        sql = 'update clean_keywords set df=' + str(
            df) + " where word='" + word + "'"
        print sql
        util.executeSQL(conn, sql)

    for word_pkgid, tf in word_tf.iteritems():
        word, pkg_id = word_pkgid.split(":")
        sql = 'update keywords set tf=' + str(
            tf) + " where name='" + word + "' and doc_id=" + str(pkg_id)
        print sql
        util.executeSQL(conn, sql)
Exemple #16
0
def calculate_TF_IDF():
    """Calculates the term frequency and inverse document frequency of
    the keywords and store them in the database tables.
    """
    conn = util.getDBConnection()
    sql = "select word from clean_keywords"
    print sql
    rows = util.executeSQL(conn, sql)
    word_tf = {}
    word_df = {}
    for row in rows:
        word = row[0]
        sql1 = "select doc_id from keywords where name='" + word + "'"
        print sql1
        res = util.executeSQL(conn, sql1)
        for row1 in res:
            pkg_id = row1[0]
            key = word + ':' + str(pkg_id)
            if key in word_tf:
                tf_count = word_tf[key]
                word_tf[key] = tf_count + 1
            else:
                word_tf[key] = 1
                if word in word_df:
                    df_count = word_df[word]
                    word_df[word] = df_count + 1
                else:
                    word_df[word] = 1

    for word, df in word_df.iteritems():
        sql = 'update clean_keywords set df=' + str(df) + " where word='" + word + "'"
        print sql
        util.executeSQL(conn, sql)

    for word_pkgid, tf in word_tf.iteritems():
        word, pkg_id = word_pkgid.split(":")
        sql = 'update keywords set tf=' + str(tf) + " where name='" + word + "' and doc_id=" + str(pkg_id)
        print sql
        util.executeSQL(conn, sql)