Exemple #1
0
def main(train=False, analyze=False):
    """ Run training (if train=True), predict scores, and then run analysis routines (if analyze=True) """
    engine = create_engine('postgresql://{}:{}@{}:{}/{}'.format(username, password, host, port, dbname))
    df = pd.read_sql_query("SELECT * FROM works", engine)

    if train:
        make_genres(df)
    df = predict_genres(df)
    if train:
        make_topics(df)
    df = predict_topics(df)
    if train:
        detrend_dates(df)
        (tp, tn, fp, fn,
         y_train, y_test, yp_train_class, yp_test_class,
         y_train_regress, y_test_regress, yp_train_regress, yp_test_regress) = make_score(df)
    df = predict_score(df)

    df.to_sql('works_flask', engine, if_exists='replace')

    if analyze:
        initial_exploration(df)
        plot_nominees()
        analyze_genres(df)
        confusion_matrix(tp, tn, fp, fn)
        analyze_topics()
 def constraint3(x):
     temp_df['alpha'] = x
     reps_per_movement_df_new_alphas = temp_df
     new_score = predict_score(reps_per_movement_df_new_alphas, wod_time)
     if old_error >= 0.0:
         return old_score - new_score
     else:
         return new_score - old_score
Exemple #3
0
def process_movie(topic, title_tag, rank, url_home, audiance_link, utils_predics):
    producer = KafkaProducer(bootstrap_servers='localhost:9092')

    title = str(title_tag.next).strip()
    movie_link = title_tag.get("href")
    try:
        movie_page = uReq(url_home + movie_link).read()
    except HTTPError as e:
        print(f"ERROR can't access to server LINK : {url_home + movie_link}  SKIPING.........")
        return 'FAILED'

    movie_data = soup(movie_page, "lxml")
    movie_propreties = get_movie_propreties(movie_data)

    score = movie_data.find("score-board", {"class": "scoreboard"})["audiencescore"]
    if score:
        score = int(score) * 1e-2
    else:
        score = 0

    genres = movie_propreties['Genre'] if 'Genre' in movie_propreties else 'unknown'
    original_language = movie_propreties['Original Language'] if 'Original Language' in movie_propreties else 'unknown'
    director = movie_propreties['Director'] if 'Director' in movie_propreties else 'unknown'
    writer = movie_propreties['Writer'] if 'Writer' in movie_propreties else 'unknown'
    date_theatre = movie_propreties[
        'Release Date (Theaters)'] if 'Release Date (Theaters)' in movie_propreties else 'unknown'
    date_streaming = movie_propreties[
        'Release Date (Streaming)'] if 'Release Date (Streaming)' in movie_propreties else 'unknown'
    box_office = movie_propreties[
        'Box Office (Gross USA)'] if 'Box Office (Gross USA)' in movie_propreties else 0
    duree = movie_propreties['Runtime'] if 'Runtime' in movie_propreties else 0
    reviews = [elem.p.next.strip() for elem in movie_data.find("div", {"id": "reviews"}).find_all("li")]

    try:
        movie_AD = uReq(url_home + movie_link + audiance_link).read()
    except HTTPError as e:
        print(f"ERROR can't access to server LINK : {url_home + movie_link + audiance_link}  SKIPING.........")
        return 'FAILED'

    reviews_AD = soup(movie_AD, "lxml")
    reviews_audiance = reviews_AD.find("div", {"id": "movieUserReviewsContent"})
    res = []
    if reviews_audiance:
        reviews_audiance = reviews_AD.find("div", {"id": "movieUserReviewsContent"}).findAll("li", {
            "class": "audience-reviews__item"})
        res = [reviews_audiance[i].find("p", {
            "class": "audience-reviews__review js-review-text clamp clamp-8 js-clamp"}).next for i in
               range(len(reviews_audiance))]
    reviews = reviews + res
    reviews = list(clean_df_column(reviews))
    movie = Movie(name=title, rank=int(rank.next.split('%')[0]) * 1e-2, genres=genres, score=score, reviews=reviews,
                  langage=original_language,
                  director=director, writer=writer, date_theatre=date_theatre, date_streaming=date_streaming,
                  box_office=box_office, duree=duree)

    movie = predict_score(movie, utils_predics)
    producer.send(str(topic), bytes(movie.serialize(), encoding='utf-8'))
    return movie
 def constraint1(x):
     temp_df['alpha'] = x
     reps_per_movement_df_new_alphas = temp_df
     new_score = predict_score([reps_per_movement_df_new_alphas, wod_time],
                               wod_format)
     new_error = (new_score - old_score) / old_score
     if old_error >= 0.0:
         return old_error - new_error
     else:
         return new_error - old_error
Exemple #5
0
    def api_prop(m, topic, utils_predics):
        producer = KafkaProducer(bootstrap_servers='localhost:9092')
        b = ia.get_movie(str(m.getID()))

        title = b['title']
        rank = (float(m['rating']) / 10) if m.__contains__('rating') else 0
        genres = b['genres'][0] if b.__contains__('genres') else 'unknown'
        score = float(m['rating']) / 10 if m.__contains__('rating') else 0
        langage = b['languages'] if b.__contains__('languages') else 'unknown'
        director = b['directors'][0]['name'] if b.__contains__(
            'directors') else 'unknown'
        writer = b['writer'][0]['name'] if b.__contains__(
            'writer') else 'unknown'
        date_theatre = b['original air date'] if b.__contains__(
            'original air date') else 'unknown'
        date_streaming = b['original air date'] if b.__contains__(
            'original air date') else 'unknown'

        try:
            box_office = str(
                b['box office']['Budget'].split(',')[0]).split('$')[1]
        except:
            box_office = 'unknown'
        try:
            duree = b['runtimes'][0] if b['runtimes'][0] else 0
        except:
            duree = 0

        reviews = []
        tweet = api.search('#' + title.strip().lower().replace(" ", ""),
                           tweet_mode="extended",
                           count=50,
                           exclude_replies=True)
        for t in tweet:
            reviews.append(t.full_text)

        reviews = list(clean_df_column(reviews))
        movie = Movie(name=title,
                      rank=rank,
                      genres=genres,
                      score=score,
                      reviews=reviews,
                      langage=langage,
                      director=director,
                      writer=writer,
                      date_theatre=date_theatre,
                      date_streaming=date_streaming,
                      box_office=box_office,
                      duree=duree)
        movie = predict_score(movie, utils_predics)
        producer.send(str(topic), bytes(movie.serialize(), encoding='utf-8'))
        return movie
Exemple #6
0
    data.append(single_row)

# Just for checking items in data[]
"""
for _ in data:
    print(_)
"""

# List with 0 as 'absent' values
refined_data_list = refine_list(data)

# List with final score which is appended at the end of lists
ielts_scores_list = ielts_scores(refined_data_list)
# for _ in ielts_scores_list:
#     print(_)

user_scores = []
print("Enter your scores please")
speaking_score = float(input("Speaking score: "))
listening_score = float(input("Listening score: "))
reading_score = float(input("Reading score: "))
writing_score = float(input("Writing score: "))

user_scores = [speaking_score, listening_score, reading_score, writing_score]
# print(user_scores)

print()
print(f"User band score: {sum(user_scores)/len(user_scores)}")

predicted_score = predict_score(ielts_scores_list, user_scores)
print(f"Predicted score using DTR: {float(predicted_score)}")
def read_wods(wod_format, new_wod_df, new_wod_bool):
    """
    TODO:
        1.) Edit method to work with other WOD types, like RoundsForTime

    :param wod_format: enumeration that represents the WOD type
    :param new_wod_df: dataframe containing WOD information
    :param new_wod_bool: boolean that tells funcdtion if this WOD is new or not (in the library or not)
    :return: None
    """
    if new_wod_bool is False:
        df_amrap = pd.read_csv('Data/amrap_wod_memory.csv',
                               names=['format', 'time_limit', 'score', 'WOD'])
    else:
        df_amrap = new_wod_df

    wod_times_list = list()
    error_list = list()
    skip = False
    if wod_format == WodFormat.AMRAP:
        for k in range(0, df_amrap.shape[0]):
            wod_time = df_amrap.iloc[k].time_limit
            wod_score = df_amrap.iloc[k].score
            wod_str_pre = df_amrap.iloc[k].WOD
            wod_str = wod_str_pre.split('|')[0:-1]

            reps_per_set_tuple = list()
            movement_tuple = list()

            for object in wod_str:
                object = object.strip()
                reps = int(re.search('[0-9]+', object).group())
                reps_per_set_tuple.append(int(reps))
                movement = re.sub("\d+|\s", "", object)
                if 'run' in movement:
                    movement = object.lstrip(digits)
                    movement = re.sub("^\s", "", movement)
                temp = alpha_library.alpha_df['movement']
                if temp.str.contains(movement).any():
                    alpha_temp = float(alpha_library.alpha_df.loc[
                        alpha_library.alpha_df['movement'] == movement]
                                       ['alpha'])
                    movement_tuple.append((reps, reps, movement, alpha_temp))
                else:
                    print('{} are not currently support\n'
                          'Skipping WOD:\n'
                          '{}\n'.format(movement, wod_str_pre))
                    skip = True
                    break

            if skip is False:
                reps_per_movement_df = pd.DataFrame(movement_tuple)
                reps_per_movement_df.columns = [
                    'reps_in_set', 'reps_performed', 'movement', 'alpha'
                ]
                reps_per_round = sum(reps_per_set_tuple)
                rounds_complete = int(wod_score / reps_per_round)
                reps_last_round = wod_score % reps_per_round

                reps_per_movement_df['reps_performed'] = reps_per_movement_df[
                    'reps_performed'] * rounds_complete
                i = 0
                num_movements = reps_per_movement_df.shape[0]

                # Determine how many reps were completed in the last round of the workout
                while reps_last_round > 0:
                    if reps_last_round > reps_per_movement_df[
                            'reps_in_set'].iloc[i % num_movements]:
                        reps_per_movement_df['reps_performed'].iloc[
                            i % num_movements] += reps_per_movement_df[
                                'reps_in_set'].iloc[i % num_movements]
                        reps_last_round -= reps_per_movement_df[
                            'reps_in_set'].iloc[i % num_movements]
                    elif reps_last_round <= reps_per_movement_df[
                            'reps_in_set'].iloc[i % num_movements]:
                        reps_per_movement_df['reps_performed'].iloc[
                            i % num_movements] += reps_last_round
                        reps_last_round = 0
                    i += 1

                movements = np.array(
                    np.unique(reps_per_movement_df['movement'].values))
                movements_in_alpha_library = alpha_library.alpha_df[
                    'movement'][alpha_library.alpha_df['movement'].isin(
                        movements)].values
                movements_not_in_alpha_library = set(movements) ^ set(
                    movements_in_alpha_library)

                # you have all movements' alphas, and now you can train/predict
                if len(movements_not_in_alpha_library) == 0:
                    predicted_score_old_alphas = predict_score(
                        [reps_per_movement_df, wod_time], wod_format)
                    error_old_alphas = (predicted_score_old_alphas -
                                        wod_score) / wod_score
                    new_alphas = change_alphas(reps_per_movement_df, wod_score,
                                               error_old_alphas, wod_time, 0.1,
                                               wod_format)

                    temp_df = reps_per_movement_df.copy()
                    temp_df['alpha'] = new_alphas
                    reps_per_movement_df_new_alphas = temp_df
                    predicted_score_new_alpha = predict_score(
                        [reps_per_movement_df_new_alphas, wod_time],
                        wod_format)

                    if new_wod_bool:
                        add_reps_and_alpha(reps_per_movement_df_new_alphas)

                    error_new_alphas = (predicted_score_new_alpha -
                                        wod_score) / wod_score
                    wod_times_list.append(wod_time)

                    print('old alpha error: {}\n'
                          'old alpha predicted score: {}\n'
                          'new alpha error: {}\n'
                          'new alpha predicted score: {}\n'
                          'actual score: {}\n'.format(
                              error_old_alphas, predicted_score_old_alphas,
                              error_new_alphas, predicted_score_new_alpha,
                              wod_score))

                    # adjust the alpha_df with new alpha values
                    for movement in movements:
                        alpha_temp = float(
                            np.mean(temp_df.loc[temp_df['movement'] ==
                                                movement]['alpha']))
                        alpha_library.alpha_df.loc[
                            alpha_library.alpha_df['movement'] == movement,
                            'alpha'] = alpha_temp

                    file = open('Data/alpha_library.csv', 'w')
                    file.truncate()
                    file.close()
                    file = open('Data/alpha_library.csv', 'a')
                    for index, row in alpha_library.alpha_df.iterrows():
                        file.write('{}, {}\n'.format(row['movement'],
                                                     row['alpha']))
                    file.close()

                    # error_vs_time_df = pd.DataFrame({'WOD Time':wod_times_list, 'Error': error_list})
                else:
                    print(
                        'The following movements are NOT supported yet (Im workin on it!):\n'
                        '{}\n'.format(movements_in_alpha_library))
            skip = False
    elif wod_format == WodFormat.RoundsForTime:
        print('gimme some time...gosh\n')
def change_alphas(wod_df, old_score, old_error, wod_time, change_limit,
                  wod_format):
    """
    This function adjusts alphas in an attempt to make future predictions more accurate. The alphas are adjusted
    based on the difference between actual and predicted WOD scores.

    :param wod_df:
    :param old_score:
    :param old_error:
    :param wod_time:
    :param change_limit:
    :param wod_format:
    :return:
    """
    num_unique_movements = len(np.unique(wod_df['movement']))
    x0 = wod_df['alpha']
    x_init = x0
    reps = wod_df['reps_performed']

    if old_error >= 0.0:
        x0 = x0 + change_limit * x0
        lower_bound = x0
        upper_bound = x0 + change_limit * x0
    else:
        lower_bound = x0 - change_limit * x0
        upper_bound = x0

    bounds = list()

    for i in range(0, len(x0)):
        b = (lower_bound[i], upper_bound[i])
        bounds.append(b)

    temp_df = wod_df.copy()

    def objective(x):
        objective_str = 'abs(' + str(old_score) + ' - ('
        for i in range(0, len(x0)):
            objective_str = objective_str + str(
                reps[i % num_unique_movements]) + '*' + str(
                    x[i % num_unique_movements]) + '+'
            x[i] = x[i % num_unique_movements]
        objective_str = objective_str[:-1] + '))'
        return eval(objective_str)

    def constraint1(x):
        temp_df['alpha'] = x
        reps_per_movement_df_new_alphas = temp_df
        new_score = predict_score([reps_per_movement_df_new_alphas, wod_time],
                                  wod_format)
        new_error = (new_score - old_score) / old_score
        if old_error >= 0.0:
            return old_error - new_error
        else:
            return new_error - old_error

    def constraint2(x):
        temp_df['alpha'] = x
        reps_per_movement_df_new_alphas = temp_df
        new_score = predict_score(reps_per_movement_df_new_alphas, wod_time)
        new_error = (new_score - old_score) / old_score
        if old_error >= 0.0:
            return new_error
        else:
            return -new_error

    def constraint3(x):
        temp_df['alpha'] = x
        reps_per_movement_df_new_alphas = temp_df
        new_score = predict_score(reps_per_movement_df_new_alphas, wod_time)
        if old_error >= 0.0:
            return old_score - new_score
        else:
            return new_score - old_score

    cons1 = {'type': 'ineq', 'fun': constraint1}
    # cons2 = {'type': 'ineq', 'fun': constraint2}
    # cons3 = {'type': 'ineq', 'fun': constraint3}
    cons = [cons1]

    z = minimize(objective,
                 x0,
                 method='SLSQP',
                 bounds=bounds,
                 constraints=cons)
    new_alphas = z['x']

    temp_df['alpha'] = new_alphas
    reps_per_movement_df_new_alphas = temp_df
    new_score = predict_score([reps_per_movement_df_new_alphas, wod_time],
                              wod_format)
    new_error = (new_score - old_score) / old_score
    while abs(new_error) > abs(old_error):
        new_alphas = change_alphas(wod_df, old_score, old_error, wod_time,
                                   change_limit - 0.1, wod_format)
        alpha_change = x_init - new_alphas
        percent_change = np.sum(alpha_change / x_init)
        print('old alphas: {} \n'
              'new alphas: {} \n'
              'change in alpha: {}'.format(
                  list(x_init[0:num_unique_movements]),
                  new_alphas[0:num_unique_movements], list(alpha_change)))

        if percent_change < 0.10:
            return new_alphas
    return new_alphas
Exemple #9
0
def new_wod_prediction():
    """
    Description
    -----------
    Coordinates all the proper method calls to enter a new WOD and get a prediction by calling the following functions:
        - add_wod_to_memory()
        - predict_score()
    Proper descriptions of the methods can be found where they are defined


    Important Variable Descriptions
    -------------------------------

    wod_obj will have the following components based on the type of workout entered:
        -AMRAP:
            wod_obj = List[wod_format, wod_df, wod_time]
                wod_format : enum
                    Determines which type of WOD the workout is
                wod_df : Dataframe
                    format        : score : WOD
                    RoundsForTime : 120   : 15 pushups|10 pullups|
                wod_time : string
                    Represents the amount of time alloted for the AMRAP in mm:ss format

        -RoundsForTime:
            wod_obj = List[wod_format, wod_df, rounds]
                wod_format : enum
                    Determines which type of WOD the workout is
                wod_df : Dataframe
                    format        : score : WOD
                    RoundsForTime : 120   : 15 pushups|10 pullups|
                rounds : int
                    The number of rounds that need to be completed in the WOD



    :return:
    None, but .csv files are used to store the new WOD and its results. The csv files have different formats depending
    on the format of the WOD:
        -AMRAPs -> wod format, time alloted, score, wod description
        -RoundsForTime -> wod format, rounds, score, wod description
    """
    wod_obj = add_wod_to_memory(new_wod=True)
    wod_format = wod_obj[0]
    alpha_df = pd.read_csv('Data/alpha_library.csv',
                           names=['movement', 'alpha'])

    # If the new WOD is an AMRAP:
    if wod_format == WodFormat.AMRAP:
        wod_df = wod_obj[1]
        wod_time = wod_obj[2]
        wod_str_pre = wod_df.iloc[0].WOD
        wod_str = wod_str_pre.split('|')[0:-1]

        df_tuple = parse_wod(wod_str, alpha_df)

        prediction_df = pd.DataFrame(df_tuple)
        prediction_df.columns = ['reps_in_set', 'movement', 'alpha']
        predicted_score = predict_score([prediction_df, wod_time], wod_format)

        scoreReveal = input(
            "When would you like to see your predicted score? (now/after)\n")
        if scoreReveal == 'now':
            print('Predicted Score: {}\n'.format(predicted_score))
            actual_score = int(input('What was your score?\n'))
        else:
            actual_score = int(input('What was your score?\n'))
            print('Predicted Score: {}\n'.format(predicted_score))

        wod_df['score'] = [actual_score]
        read_wods(wod_format, wod_df, new_wod_bool=True)
        file = open('Data/amrap_wod_memory.csv', 'a')
        file.write('{}, {}, {}, {}\n'.format(wod_df.format[0], wod_time,
                                             actual_score, wod_str_pre))

    elif wod_format == WodFormat.RoundsForTime:
        wod_df = wod_obj[1]
        rounds = wod_obj[2]
        wod_str_pre = wod_df.iloc[0].WOD
        wod_str = wod_str_pre.split('|')[0:-1]

        df_tuple = parse_wod(wod_str, alpha_df)

        prediction_df = pd.DataFrame(df_tuple)
        prediction_df.columns = ['reps_in_set', 'movement', 'alpha']
        predicted_score = predict_score([prediction_df, rounds], wod_format)

        scoreReveal = input(
            "When would you like to see your predicted score? (now/after)\n")
        if scoreReveal == 'now':
            print('Predicted Score: {}\n'.format(predicted_score))
            actual_score = int(input('What was your score?\n'))
        else:
            actual_score = int(input('What was your score?\n'))
            print('Predicted Score: {}\n'.format(predicted_score))

        wod_df['score'] = [actual_score]
        read_wods(wod_format, wod_df, new_wod_bool=True)
        file = open('Data/rft_wod_memory.csv', 'a')
        file.write('{}, {}, {}, {}\n'.format(wod_df.format[0], rounds,
                                             actual_score, wod_str_pre))