Ejemplo n.º 1
0
def write_predictdb():
    cursor.execute(
        '''UPDATE UpdateFilm SET linear_predict=%s,linear_test=%s,
        lasso_predict=%s,lasso_test=%s,knn_predict=%s,knn_test=%s,poly_predict=%s,poly_test=%s 
        WHERE imdb_filmID = %s''',
        (scoredict['linear_predict'], scoredict['linear_test'],
         scoredict['lasso_predict'], scoredict['lasso_test'],
         scoredict['knn_predict'], scoredict['knn_test'],
         scoredict['poly_predict'], scoredict['poly_test'], filmid))
    db.commit()
Ejemplo n.º 2
0
from db_helper.init_cursor import cursor
from db_helper.init_cursor import db


def get_exist_list():
    exist_tup = cursor.fetchall()
    exists = list()
    for a_item in exist_tup:
        exists.append(a_item[0])
    return exists


cursor.execute('SELECT tags FROM FilmDB')
res = dict()
tags = get_exist_list()
for dd in tags:
    for tag in dd.split('/'):
        if tag!='':
            res[tag] = res.get(tag, 0) + 1
print res
Ejemplo n.º 3
0
        return int(res)
    else:
        return 0


def get_weekgross(s):
    for item in s:
        if item.h4:
            if item.h4.string:
                if item.h4.string.startswith('Opening Weekend'):
                    return get_num(item.get_text())


cursor.execute(
    'SELECT FilmDB.imdb_filmID '
    'FROM FilmDB,TrailerClick '
    'WHERE FilmDB.imdb_filmID=TrailerClick.imdb_filmID '
    'AND (country=\'USA\'OR country=\'UK\') AND gross>1000000 and openweek_gross is null'
)

filmids = get_exist_list()
print filmids
for filmid in filmids:
    soup = page_read.page_read_nolog('http://www.imdb.com/title/' + filmid +
                                     '/')
    if soup.select('.txt-block'):
        weekgross = get_weekgross(soup.select('.txt-block'))
        if weekgross and weekgross != 0:
            cursor.execute(
                '''UPDATE TrailerClick SET openweek_gross=%s WHERE imdb_filmID=%s''',
                (weekgross, filmid))
            db.commit()
Ejemplo n.º 4
0

def write_predictdb():
    cursor.execute(
        '''UPDATE UpdateFilm SET linear_predict=%s,linear_test=%s,
        lasso_predict=%s,lasso_test=%s,knn_predict=%s,knn_test=%s,poly_predict=%s,poly_test=%s 
        WHERE imdb_filmID = %s''',
        (scoredict['linear_predict'], scoredict['linear_test'],
         scoredict['lasso_predict'], scoredict['lasso_test'],
         scoredict['knn_predict'], scoredict['knn_test'],
         scoredict['poly_predict'], scoredict['poly_test'], filmid))
    db.commit()


cursor.execute('SELECT click_times,gross '
               'FROM FilmDB,TrailerClick '
               'WHERE FilmDB.imdb_filmID=TrailerClick.imdb_filmID '
               'AND gross>4*TrailerClick.click_times')
clicks = cursor.fetchall()
X_R1 = list()
y_R1 = list()
for click_time, gross in clicks:
    X_R1.append(click_time)
    y_R1.append(gross)
X_data = np.array(X_R1).reshape(-1, 1)
y_data = np.array(y_R1).reshape(-1, 1)
X_train, X_test, y_train, y_test = train_test_split(X_data,
                                                    y_data,
                                                    test_size=0.2,
                                                    random_state=4)
cursor.execute(
    'SELECT TrailerClick.imdb_filmID,max(click_times) FROM UpdateFilm,TrailerClick WHERE TrailerClick.imdb_filmID=UpdateFilm.imdb_filmID GROUP BY UpdateFilm.imdb_filmID'
Ejemplo n.º 5
0
from db_helper.init_cursor import db
from db_helper.init_cursor import cursor


def get_exist_list():
    exist_tup = cursor.fetchall()
    exists = list()
    for a_item in exist_tup:
        exists.append(a_item[0])
    return exists


cursor.execute('SELECT imdb_filmID FROM UpdateFilm ')
filmids = get_exist_list()
cursor.execute('SELECT imdb_filmID FROM TrailerClick')
all_ids = get_exist_list()
for a_id in all_ids:
    if a_id not in filmids:
        cursor.execute('DELETE FROM TrailerClick WHERE imdb_filmID=%s',
                       (a_id, ))
db.commit()