Beispiel #1
0
def _main():
    config = get_config(CONFIG_PATH)

    check_ssl_expire_date(config)

    get_twitter(config)

    memo = Memo(config)

    if memo.is_exist_today_memo():
        print("Today's memo file has been created already.")
    else:
        print("Today's memo file does not exist.\n"
              "The latest memo will be copied.")
        try:
            memo.copy_memo_from_newest()
        except MemoError as err:
            print('Failed to copy memo file: {}'.format(err))
            exit(1)

        print("Did you fill out yesterday's attendance?")
        memo.open_excel()
        memo.open_website_url()

    memo.open_today_memo()
    exit(0)
Beispiel #2
0
def test_twitter():
    class loc(object):
        pass
    loc.lat = '37.7833'
    loc.lng = '-122.4167'
    response = get_twitter("San Francisco", loc)
    date = response[0][0]['date']
    text = response[0][1]['text']
    assert((date != None and text != None) == True) # "test_twitter test 1"

    loc.lat = '40.7127'
    loc.lng = '-74.0059'
    response = get_twitter("New York", loc)
    date = response[0][0]['date']
    text = response[0][1]['text']
    assert((date != None and text != None) == True) # "test_twitter test 2"
Beispiel #3
0
def evaluate_model(model_name="als"):
    """evaluate the model by cross-validation"""

    # train the model based off input params
    artists, users, plays = get_twitter()

    # create a model from the input data
    model = get_model(model_name)

    # split data_set to train set and testing set
    train, testing = train_test_split(plays)

    # evaluation
    result = precision_at_k(model=model, train_user_items=train, test_user_items=testing)

    print('precision@k = ', result)
Beispiel #4
0
def calculate_similar_artists(output_filename, model_name="als"):
    """ generates a list of similar artists in lastfm by utiliizing the 'similar_items'
    api of the models """
    artists, users, plays = get_twitter()

    # create a model from the input data
    model = get_model(model_name)

    # if we're training an ALS based model, weight input for last.fm
    # by bm25
    if issubclass(model.__class__, AlternatingLeastSquares):
        # lets weight these models by bm25weight.
        logging.debug("weighting matrix by bm25_weight")
        plays = bm25_weight(plays, K1=100, B=0.8)

        # also disable building approximate recommend index
        model.approximate_recommend = False

    # this is actually disturbingly expensive:
    plays = plays.tocsr()

    logging.debug("training model %s", model_name)
    start = time.time()
    model.fit(plays)
    logging.debug("trained model '%s' in %0.2fs", model_name, time.time() - start)

    # write out similar artists by popularity
    start = time.time()
    logging.debug("calculating top artists")

    user_count = np.ediff1d(plays.indptr)
    to_generate = sorted(np.arange(len(artists)), key=lambda x: -user_count[x])

    # write out as a TSV of artistid, otherartistid, score
    logging.debug("writing similar items")
    with tqdm.tqdm(total=len(to_generate)) as progress:
        with codecs.open(output_filename, "w", "utf8") as o:
            for artistid in to_generate:
                artist = artists[artistid]
                for other, score in model.similar_items(artistid, 11):
                    o.write("%s\t%s\t%s\n" % (artist, artists[other], score))
                progress.update(1)

    logging.debug("generated similar artists in %0.2fs",  time.time() - start)
Beispiel #5
0
def calculate_recommendations(output_filename, model_name="als"):
    """ Generates artist recommendations for each user in the dataset """
    # train the model based off input params
    artists, users, plays = get_twitter()

    # create a model from the input data
    model = get_model(model_name)

    # if we're training an ALS based model, weight input for last.fm
    # by bm25
    if issubclass(model.__class__, AlternatingLeastSquares):
        # lets weight these models by bm25weight.
        logging.debug("weighting matrix by bm25_weight")
        plays = bm25_weight(plays, K1=100, B=0.8)

        # also disable building approximate recommend index
        model.approximate_similar_items = False

    # this is actually disturbingly expensive:
    plays = plays.tocsr()

    logging.debug("training model %s", model_name)
    start = time.time()
    model.fit(plays)
    logging.debug("trained model '%s' in %0.2fs", model_name, time.time() - start)

    # generate recommendations for each user and write out to a file
    start = time.time()
    user_plays = plays.T.tocsr()
    with tqdm.tqdm(total=len(users)) as progress:
        with codecs.open(output_filename, "w", "utf8") as o:
            for userid, username in enumerate(users):
                for artistid, score in model.recommend(userid, user_plays):
                    o.write("%s\t%s\t%s\n" % (username, artists[artistid], score))
                progress.update(1)
    logging.debug("generated recommendations in %0.2fs",  time.time() - start)
Beispiel #6
0
def _lookup_twitter(city, location):
    return twitter.get_twitter(city, location)