예제 #1
0
def ensureMovieTagsTable(fileName, dbConnection):
    cur = dbConnection.cursor()
    TABLE_NAME = 'MovieTags'
    if dbHelper.doesTableExist(TABLE_NAME, cur):
        return

    # Syntax 'create table if not exists' exists, but we don't know if we need to insert rows.
    with open(os.path.join(DATA_FOLDER, fileName),
              encoding='utf-8') as movieYearGenresFile:
        csvReader = csv.reader(movieYearGenresFile)
        # skip header
        headers = next(csvReader)

        cur.execute(
            "CREATE TABLE {0} (id INTEGER NOT NULL PRIMARY KEY, {1})".format(
                TABLE_NAME,
                ','.join([h + ' integer not null' for h in headers[1:]])))
        # table names can't be the target of parameter substitution
        # https://stackoverflow.com/a/3247553/746461

        to_db = list(csvReader)

        cur.executemany(
            "INSERT INTO {0} VALUES (?,{1})".format(
                TABLE_NAME, ','.join(['?'] * (len(headers) - 1))), to_db)
        dbConnection.commit()

    cur.execute('select * from {0} where id=2'.format(TABLE_NAME))
    print(cur.fetchone())
예제 #2
0
def ensureGenomeScoresTable(fileName, dbConnection):
    cur = dbConnection.cursor()
    TABLE_NAME = 'GenomeScore'
    if dbHelper.doesTableExist(TABLE_NAME, cur):
        return

    cur.execute(
        "CREATE TABLE {0} (movieId INTEGER NOT NULL, tagId INTEGER NOT NULL, relevance REAL NOT NULL, PRIMARY KEY(movieId, tagId))"
        .format(TABLE_NAME))
    with open(os.path.join(DATA_FOLDER, fileName), encoding='utf-8') as f:
        csvReader = csv.reader(f)
        next(csvReader)

        to_db = [row for row in csvReader]

        cur.executemany("INSERT INTO {0} VALUES (?,?,?);".format(TABLE_NAME),
                        to_db)
        dbConnection.commit()

    cur.execute('CREATE INDEX tagId ON {0} (tagId ASC)'.format(TABLE_NAME))

    cur.execute(
        'select * from {0} where movieId=1 and tagId=1'.format(TABLE_NAME))
    print('GenomeScore table is created.')
    print(cur.fetchone())
예제 #3
0
def ensureMoviePopularityTable(dbConnection):
    cur = dbConnection.cursor()
    TABLE_NAME = 'MoviePopularity'
    if dbHelper.doesTableExist(TABLE_NAME, cur):
        return

    cur.execute(
        "CREATE TABLE {0} (movieId INTEGER NOT NULL, popularity integer not null, PRIMARY KEY(movieId))"
        .format(TABLE_NAME))
    cur.execute('''
insert into {0}
select movieId, cast(sum(rating) as real) / count(*)
from Ratings
GROUP by movieId'''.format(TABLE_NAME))
    dbConnection.commit()
    cur.execute('select * from {0} where movieId=1653'.format(TABLE_NAME))
    print(TABLE_NAME)
    print(cur.fetchone())
예제 #4
0
def ensureTestRatingTable(fileName, dbConnection):
    cur = dbConnection.cursor()
    TABLE_NAME = 'TestRatings'
    if dbHelper.doesTableExist(TABLE_NAME, cur):
        return

    cur.execute(
        "CREATE TABLE {0} (userId INTEGER NOT NULL,movieId INTEGER NOT NULL, predict integer, PRIMARY KEY(userId,movieId))"
        .format(TABLE_NAME))
    with open(os.path.join(DATA_FOLDER, fileName), encoding='utf-8') as f:
        csvReader = csv.reader(f)
        next(csvReader)

        to_db = [row for row in csvReader]

        cur.executemany(
            "INSERT INTO {0} VALUES (?,?, null);".format(TABLE_NAME), to_db)
        dbConnection.commit()
예제 #5
0
def ensureValidationRatingsTable(fileName, dbConnection):
    cur = dbConnection.cursor()
    TABLE_NAME = 'ValidationRatings'
    if dbHelper.doesTableExist(TABLE_NAME, cur):
        return

    cur.execute(
        "CREATE TABLE {0} (userId INTEGER NOT NULL,movieId INTEGER NOT NULL,rating INTEGER NOT NULL, predict INTEGER, PRIMARY KEY(userId,movieId))"
        .format(TABLE_NAME))
    with open(os.path.join(DATA_FOLDER, fileName), encoding='utf-8') as f:
        csvReader = csv.reader(f)
        next(csvReader)

        to_db = [row for row in csvReader]

        cur.executemany(
            "INSERT INTO {0} VALUES (?,?,?,null);".format(TABLE_NAME), to_db)
        dbConnection.commit()

    cur.execute(
        'select * from {0} where userId=1 and movieId=1653'.format(TABLE_NAME))
    print(TABLE_NAME)
    print(cur.fetchone())