Beispiel #1
0
    def test_recommendation(self):
        movies = utils.load_movie_list()
        my_ratings = np.zeros(len(movies))
        my_ratings[1 - 1] = 4
        my_ratings[98 - 1] = 2
        my_ratings[7 - 1] = 3
        my_ratings[12 - 1] = 5
        my_ratings[54 - 1] = 4
        my_ratings[64 - 1] = 5
        my_ratings[66 - 1] = 3
        my_ratings[69 - 1] = 5
        my_ratings[183 - 1] = 4
        my_ratings[226 - 1] = 5
        my_ratings[355 - 1] = 5

        R = utils.load_from_file('data/R.bin').astype(float)
        Y = utils.load_from_file('data/Y.bin')
        Y = np.column_stack((my_ratings, Y))
        R = np.column_stack((my_ratings != 0, R))

        model = recommender.Recommender(Y=Y, R=R, reg=10, num_features=10)
        model.learn(maxiter=1000, verbose=True, normalize=False, tol=1e-1)
        user_id = 0
        rated_ids = [i for i in range(Y.shape[0]) if R[i,user_id] == 1]
        print("USER {} HAS RATED:".format(user_id))
        for i in rated_ids:
            print("   RATED <{:.1f}> FOR '{}'".format(Y[i,user_id], movies[i]))
        recommendations = model.recommendations(user_id=user_id)
        print("RECOMMENDATIONS:")
        for (i, rating) in recommendations:
            print("   <{:.1f}> {}".format(rating, movies[i]))
Beispiel #2
0
def main():
    R = utils.load_from_file('data/R.bin').astype(float)
    Y = utils.load_from_file('data/Y.bin')

    # reg_list = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100]
    reg_list = [1e3, 1e4]
    num_features_list = [45, 50]

    model = Recommender(Y=Y, R=R)

    # for reg in reg_list:
    #     print("::: Trying reg = {}".format(reg))
    #     model.learn(verbose=True, reg=reg, num_features=DEFAULT_NUM_FEATURES, maxiter=DEFAULT_MAX_ITER)
    #     rmse = model.rmse()
    #     mae = model.mae()
    #     with open("log.csv", "a", newline='') as csvfile:
    #         csvwriter = csv.writer(csvfile)
    #         csvwriter.writerow([DEFAULT_NUM_FEATURES, reg, rmse, mae])

    for num_features in num_features_list:
        print("::: Trying num_feature = {}".format(num_features))
        model.learn(verbose=True,
                    reg=DEFAULT_REG,
                    num_features=num_features,
                    maxiter=DEFAULT_MAX_ITER)
        rmse = model.rmse()
        mae = model.mae()
        with open("log.csv", "a", newline='') as csvfile:
            csvwriter = csv.writer(csvfile)
            csvwriter.writerow([num_features, DEFAULT_REG, rmse, mae])
Beispiel #3
0
def init():
  global igrice
  global ramovi
  global procesori
  global graficke
  igrice = utils.load_from_file('data/igrice.txt')
  ramovi = utils.load_from_file('data/ramovi.txt')
  graficke = utils.load_from_file('data/graficke.txt')
  procesori = utils.load_from_file('data/procesori.txt')
Beispiel #4
0
def init():
    global igrice
    global ramovi
    global procesori
    global graficke
    igrice = utils.load_from_file('data/igrice.txt')
    ramovi = utils.load_from_file('data/ramovi.txt')
    graficke = utils.load_from_file('data/graficke.txt')
    procesori = utils.load_from_file('data/procesori.txt')
Beispiel #5
0
    def test_learn_and_save(self):
        # num_users, num_movies, num_features = 10, 10, 5
        R = utils.load_from_file('data/R.bin').astype(float)
        Y = utils.load_from_file('data/Y.bin')

        model = recommender.Recommender(Y=Y, R=R, reg=10, num_features=10)
        model.learn(maxiter=10, verbose=True)
        X, Theta = model.X, model.Theta

        filename = "models/recommender.bin"
        model.save(filename)
        model = recommender.Recommender.load(filename)
        np.testing.assert_almost_equal(X, model.X, decimal=2)
        np.testing.assert_almost_equal(Theta, model.Theta, decimal=2)
Beispiel #6
0
def evaluate(args):
    config = Config(args)

    train, test, word_to_id, id_to_word, embeddings = utils.load_from_file()
    config.word_to_id = word_to_id
    config.id_to_word = id_to_word

    with tf.Graph().as_default():
        logger.info('Building model...', )
        start = time.time()
        model = RNNModel(config, embeddings)

        logger.info('took %.2f seconds', time.time() - start)

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        with tf.Session() as session:
            session.run(init)
            saver.restore(session, model.config.model_path)

            sentences, masks, predictions = model.output(session, train)
            originals, predictions = lookup_words(predictions, sentences,
                                                  id_to_word)
            output = zip(originals, masks, predictions)

            with open('eval_results.txt', 'w') as f:
                utils.save_results(f, output)
Beispiel #7
0
def train(args):
    config = Config(args)
    train, test, id_to_word, embedding_lookup, embeddings = utils.load_from_file(
    )

    config.id_to_word = id_to_word
    config.embedding_lookup = embedding_lookup
    utils.save(config.output_path, embedding_lookup, id_to_word)

    handler = logging.FileHandler(config.log_output)
    handler.setLevel(logging.DEBUG)
    handler.setFormatter(
        logging.Formatter('%(asctime)s:%(levelname)s: %(message)s'))
    logging.getLogger().addHandler(handler)

    with tf.Graph().as_default():
        logger.info('Building model...', )
        start = time.time()
        model = RNNModel(config, embeddings)
        logger.info('took %.2f seconds', time.time() - start)

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        with tf.Session() as session:
            session.run(init)
            model.fit(session, saver, train, test)

            sentences, masks, predictions = model.output(session, train)
            originals, predictions = lookup_words(predictions, sentences,
                                                  id_to_word)
            output = zip(originals, masks, predictions)

            with open('results.txt', 'w') as f:
                utils.save_results(f, output)
Beispiel #8
0
def step3_fill_lengths():
    """Retrieve the lengths of the pages via APIs"""
    cuisines = load_from_file('data/cuisines_langs.dat')

    # TODO: refactor grouping together pages, do only one request for every xyz.wikipedia.org
    params = {'action': 'query', 'prop': 'info', 'format': 'json'}
    skipped = []
    for kk, vv in tqdm(cuisines.items()):
        for lang_prefix, page in tqdm(vv['languages'].items()):
            if lang_prefix != 'en':
                wiki_url = page['wiki_url']
                api_url = f'https://{wiki_url}/w/api.php'
                params['titles'] = page['title']
                with requests.Session() as session:
                    post = session.post(api_url, params)
                    if post.ok:
                        res = post.json()
                    else:
                        print("Issue in POST call")
                        print(f"{api_url}\n{params}")
                page_data = res['query']['pages'][next(
                    iter(res['query']['pages']))]
                if 'length' in page_data:
                    vv['languages'][lang_prefix]['length'] = page_data[
                        'length']
                else:
                    skipped.append((kk, lang_prefix))
    if skipped:
        for page, lang in skipped:
            print(f"[Skip] {page} in language {lang} (unavailable length)")
    save_to_file('data/cuisines_length.dat', cuisines)
Beispiel #9
0
def step2_populate_other_languages():
    """Gets URLs and titles of cuisines in multiple languages"""
    cuisines_raw = load_from_file('data/cuisines_raw.dat')

    wiki_url = 'https://en.wikipedia.org/w/api.php'
    params = {
        'action': 'query',
        'prop': 'langlinks|info',
        'llprop': 'url',
        'lllimit': 'max',
        'format': 'json'
    }
    print("Getting links for every cuisine for every language...")
    for vv in tqdm(cuisines_raw.values()):
        pageid = vv['pageid']
        params['pageids'] = pageid
        with requests.Session() as session:
            post = session.post(wiki_url, params)
            res = post.json()
            res_info = res['query']['pages'][pageid]
        if 'langlinks' in res_info:
            vv['languages'] = {
                vv['lang']: {
                    'title': vv['*'],
                    'wiki_url': strip_url(vv['url'])
                }
                for vv in res_info['langlinks']
            }
            vv['languages']['en'] = {}
            vv['languages']['en']['length'] = res_info['length']
            vv['languages']['en']['title'] = res['query']['pages'][pageid][
                'title']
    save_to_file('data/cuisines_langs.dat', cuisines_raw)
Beispiel #10
0
 def load(self):
     try:
         file_path = join(INT_TEMP_SAVE_FOLDER,
                          next(reversed(sorted(listdir(INT_TEMP_SAVE_FOLDER)))))
         self._surface.deserialise(load_from_file(file_path))
         print('[OKAY] file has been loaded from:', file_path)
     except StopIteration:
         print('[FAIL] there is no file in:', INT_TEMP_SAVE_FOLDER)
Beispiel #11
0
    def __init__(self):
        self.stop_words = set(stopwords.words('english'))
        self.SVC_pipeline = utils.load_from_file("lsvc_pipeline")

        self.categories = [
            'toxic', 'severe_toxic', 'obscene', 'threat', 'insult',
            'identity_hate'
        ]
Beispiel #12
0
    def test_cf_cost_regularization(self):
        # print("Loading dataset...")
        R = utils.load_from_file('data/R.bin')
        Y = utils.load_from_file('data/Y.bin')

        # print("Loading pre-trained parameters...")
        _ = loadmat('data/movie_params.mat')
        X = _.get('X')
        Theta = _.get('Theta')

        # reduce dataset
        num_users = 4
        num_movies = 5
        num_features = 3
        X = X[:num_movies, :num_features]
        Theta = Theta[:num_users, :num_features]
        Y = Y[:num_movies, :num_users]
        R = R[:num_movies, :num_users]

        params = np.append(X.flatten(), Theta.flatten())
        cost = utils.cf_cost(params=params, Y=Y, R=R, num_features=num_features, reg=1.5)[0]
        self.assertAlmostEqual(31.34, cost, places=2)
Beispiel #13
0
    def test_cf_cost(self):
        # print("Loading dataset...")
        R = utils.load_from_file('data/R.bin')
        Y = utils.load_from_file('data/Y.bin')

        # print("Loading pre-trained parameters...")
        _ = loadmat('data/movie_params.mat')
        X = _.get('X')
        Theta = _.get('Theta')

        # reduce dataset
        num_users = 4
        num_movies = 5
        num_features = 3
        X = X[:num_movies, :num_features]
        Theta = Theta[:num_users, :num_features]
        Y = Y[:num_movies, :num_users]
        R = R[:num_movies, :num_users]

        params = np.append(X.flatten(), Theta.flatten())
        cost = utils.cf_cost(params=params, Y=Y, R=R, num_features=num_features, reg=0)[0]
        # print("Expected cost = 22.22")
        # print("Computed cost = {:.2f}".format(cost))
        self.assertAlmostEqual(22.22, cost, places=2)
    def __init__(self, feature_json_file, timeout=10, max_workers=10):
        """
            Pipeline that manages scoring of multiple custom feature scorers
            This is the API that almost all scorers will access when training \
                a Retrieve & Rank instance with custom features

            args:
                feature_json_file (str): Path to a feature configuration file. \
                    This file defines the pipeline of custom scorers used
            raise:
                se.ScorerConfigurationException : If any of the individual scorers raise during configuration, \
                    If the file feature_json_file cannot be found or is not of the proper type
        """
        scorer_dict = utils.load_from_file(feature_json_file)
        self._document_scorers = scorer_dict.get('document', [])
        self._query_scorers = scorer_dict.get('query', [])
        self._query_document_scorers = scorer_dict.get('query_document', [])
        self._timeout = timeout
        self._interval = 0.1
        self._thread_executor = futures.ThreadPoolExecutor(max_workers)
    def __init__(self, feature_json_file, timeout=10, max_workers=10):
        """
            Pipeline that manages scoring of multiple custom feature scorers
            This is the API that almost all scorers will access when training \
                a Retrieve & Rank instance with custom features

            args:
                feature_json_file (str): Path to a feature configuration file. \
                    This file defines the pipeline of custom scorers used
            raise:
                se.ScorerConfigurationException : If any of the individual scorers raise during configuration, \
                    If the file feature_json_file cannot be found or is not of the proper type
        """
        scorer_dict = utils.load_from_file(feature_json_file)
        self._document_scorers = scorer_dict.get('document', [])
        self._query_scorers = scorer_dict.get('query', [])
        self._query_document_scorers = scorer_dict.get('query_document', [])
        self._timeout = timeout
        self._interval = 0.1
        self._thread_executor = futures.ThreadPoolExecutor(max_workers)
Beispiel #16
0
    def test_save_and_load(self):
        team_members = main.get_team_members()
        team_data = {'date': team_members}
        # save data to file
        data_file_name = '../data/test/web_data_json_test.data'

        utils.save_to_file(data_file_name, team_data)
        #
        # test loading from the file and if the name of Johanna can be found
        #
        member_data = utils.load_from_file(data_file_name)
        a_day_data = {}
        # take the first item in the dictionary; doesn'e matter which one it is
        for key in member_data:
            a_day_data = member_data[key]
            break
        found_Johanna = False
        for d in a_day_data:
            if d['name'] == 'Johanna Nicoletta':
                found_Johanna = True
        self.assertEqual(found_Johanna, True, "Can not save or load from file")
Beispiel #17
0
 def train_l2sp(self, env_name="Merging-v0"):
     """
     Directly trains on env_name
     """
     bs2model = {1:B1R, 3:B3R, 5:B5R, 7:B7R}
     model_info = bs2model[int(self.bs)]
     model_dir = os.path.join(model_info[0], model_info[1], model_info[2])
     data, params = utils.load_from_file(model_dir)
     self.model = PPO2L2SP.load(model_dir, original_params=params)
     for seed in [201, 202, 203, 204, 205]:
             self.seed = seed
             self.experiment_name = f"{model_info[1]}_B{self.bs}L_L2SP{seed}"
             print("EXPT NAME: ", self.experiment_name)
             self.experiment_dir = os.path.join(self.experiment_dir1, self.experiment_name)
             self.create_eval_dir()
             env = gym.make(env_name)
             env.barrier_size = self.bs
             env = DummyVecEnv([lambda: env])
             self.model.set_env(env)
             eval_env = gym.make(env_name)
             eval_env.barrier_size = self.bs
             self.model = train(self.model, eval_env, self.timesteps, self.experiment_dir,
                                self.is_save, self.eval_save_period, self.rets_path, 0)
Beispiel #18
0
def main():
    if not Path('data/cuisines_raw.dat').exists():
        execute_steps(STEPS, [i for i in range(0, len(STEPS))])
    elif not Path('data/cuisines_langs.dat').exists():
        execute_steps(STEPS, [i for i in range(1, len(STEPS))])
    elif not Path('data/cuisines_length.dat').exists():
        execute_steps(STEPS, [i for i in range(2, len(STEPS))])
    elif not Path('data/table_dataframe.dat').exists():
        execute_steps(STEPS, [i for i in range(3, len(STEPS))])
    if not Path('data/table_dataframe_full.dat').exists():
        step4_preprocess_data_frame(create_full_df=True)
    if not Path('data/wiki_languages.dat').exists():
        get_wikimedia_languages_list()

    cc1 = load_from_file('data/cuisines_raw.dat')
    cc2 = load_from_file('data/cuisines_langs.dat')
    cc3 = load_from_file('data/cuisines_length.dat')
    wl = load_from_file('data/wiki_languages.dat')
    df = load_from_file('data/table_dataframe.dat')
    df_full = load_from_file('data/table_dataframe_full.dat')

    # Plot dataframe
    step5_create_plots(df, df_full)
Beispiel #19
0
 def load(filename):
     import utils
     print("Loading recommender model from '{}'".format(filename))
     return utils.load_from_file(filename)
Beispiel #20
0
def step4_preprocess_data_frame(create_full_df=False):
    """Create pandas DataFrames filtering out undesired data"""
    cuisines = load_from_file('data/cuisines_length.dat')

    # Set values (depending if dataframe/dataframe_full is to create)
    if create_full_df:
        threshold_min_voice_length = 0
        threshold_min_cuisines = 0
        threshold_min_languages = 0
        filename = 'data/table_dataframe_full.dat'
    else:
        threshold_min_voice_length = defs.THRESHOLD_MIN_VOICE_LENGTH
        threshold_min_cuisines = defs.THRESHOLD_MIN_CUISINES
        threshold_min_languages = defs.THRESHOLD_MIN_LANGUAGES
        filename = 'data/table_dataframe.dat'

    # Set pandas view options
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', None)
    pd.set_option('display.max_colwidth', None)

    # Find languages to consider
    languages = set()
    for kk, vv in cuisines.items():
        for lang in [*vv['languages'].keys()]:
            languages.add(lang)
    languages = [*languages]
    languages.sort()
    languages.insert(0, 'cuisine')

    # Create full table
    df_fulltable = pd.DataFrame(columns=languages)
    for kk, vv in tqdm(cuisines.items()):
        entry = {}
        for kk2, vv2 in vv['languages'].items():
            if 'length' in vv2 and kk2 in languages:
                entry[kk2] = vv2['length']
            # Add cuisine name (removing "cuisine")
            entry['Cuisine'] = kk.replace(" cuisine", "")
        df_fulltable = df_fulltable.append(entry, ignore_index=True)

    short_voices = []
    for (c_name, c_data) in df_fulltable.iteritems():
        if c_name != 'Cuisine':
            for entry in c_data.iteritems():
                if not pd.isna(entry[1]) and int(
                        entry[1]) < threshold_min_voice_length:
                    short_voices.append((c_name, entry[0], entry[1]))

    for entry in short_voices:
        df_fulltable.at[entry[1], entry[0]] = np.nan

    # TODO:Fix: depending on the order different results are obtained
    # Keep all languages that have least THRESHOLD_MIN_CUISINES written
    df_fulltable.dropna(axis=1, thresh=threshold_min_cuisines, inplace=True)
    # Keep all cuisines that appears in at least THRESHOLD_MIN_LANGUAGES languages
    df_fulltable = df_fulltable[df_fulltable.isnull().sum(
        axis=1) < len(df_fulltable.columns) - threshold_min_languages]

    df_fulltable.reset_index(drop=True, inplace=True)
    df_fulltable.set_index('Cuisine', inplace=True)
    df_fulltable.columns.names = ['Wikipedia language']

    save_to_file(filename, df_fulltable)
Beispiel #21
0
 def test_rating_normalization(self):
     Y = utils.load_from_file('data/Y.bin')[:10, :10]
     R = utils.load_from_file('data/R.bin')[:10, :10]
     Ynorm, Ymean = utils.normalize_ratings(Y, R)
     Ymean_target = np.array([4.2, 3, 4, 4, 3, 5, 3.66666667, 3.33333333, 4.5, 3])
     np.testing.assert_almost_equal(Ymean, Ymean_target, decimal=2)
Beispiel #22
0
    def test(self, params):
        print('\n%s: testing...' % datetime.now())
        sys.stdout.flush()

        session = Session(self._graph, self.results_dir, params['model_name'])
        if 'init_step' not in params or params['init_step'] is None:
            init_step = session.init_step
        else:
            init_step = params['init_step']

        if 'step_num' not in params or params['step_num'] is None:
            step_num = int(np.ceil(
                np.float(self.fold_size) / self._batch_size))
        else:
            step_num = params['step_num']

        results_file_name = Tester.RESULTS_FILE + '-' + str(init_step) + '-' + \
                            self.fold_name + '-' + str(step_num) + '.json'
        results_file = os.path.join(self.results_dir, results_file_name)
        if not params['load_results'] or not os.path.isfile(results_file):
            session.init(self._classifier, init_step, params['restoring_file'])
            session.start()
            if init_step == 0:
                print 'WARNING: testing an untrained model'
            total_step_num = step_num * params['epoch_num']
            test_num = total_step_num * self._batch_size
            print('%s: test_num=%d' %
                  (datetime.now(), step_num * self._batch_size))
            print('%s: epoch_num=%d' % (datetime.now(), params['epoch_num']))

            results = {}
            results['losses'] = np.zeros(test_num, dtype=np.float32)
            results['probs'] = np.zeros((test_num, Reader.CLASSES_NUM),
                                        dtype=np.float32)
            results['labels'] = np.zeros(test_num, dtype=np.int64)

            start_time = time.time()
            for step in range(total_step_num):
                #print('%s: eval_iter=%d' %(datetime.now(), i))
                loss_batch, prob_batch, label_batch = session.run([
                    self._cross_entropy_losses, self._probs,
                    self._input['labels']
                ])
                begin = step * self._batch_size
                results['losses'][begin:begin + self._batch_size] = loss_batch
                results['probs'][begin:begin +
                                 self._batch_size, :] = prob_batch
                results['labels'][begin:begin + self._batch_size] = label_batch
                if (step + 1) % step_num == 0:
                    print "Epoch num: %d" % ((step + 1) / step_num)
                if session.should_stop():
                    break

            duration = time.time() - start_time
            print('%s: duration = %.1f sec' %
                  (datetime.now(), float(duration)))
            sys.stdout.flush()
            if self.writer is not None:
                summary_str = session.run(self._all_summaries)
                self.writer.write_summaries(summary_str, init_step)

            session.stop()
        else:
            print 'WARNING: using precomputed results'
            results = utils.load_from_file(results_file)

        results['loss'] = np.mean(results['losses']).item()
        results = self.get_all_stats(results)
        if self.writer is not None and not params['load_results']:
            self.writer.write_scalars(
                {
                    'losses/testing/cross_entropy_loss': results['loss'],
                    'accuracy': results['accuracy']
                }, init_step)
        utils.dump_to_file(results, results_file)

        return init_step, results['loss']
Beispiel #23
0
# Import blender modules
import bpy

# Import plastey modules
path.insert(0, '.')
from utils import load_from_file, name_of_vertex
from const import INT_PERMANENT_FOLDER, OBJ_GEOMETRY

#------------------------------------------------------------------------------#
FILE_NAME = '.bz2'
SURF_TYPE = 0 # plane=0, sphere=1


#------------------------------------------------------------------------------#
coords = load_from_file(join(INT_PERMANENT_FOLDER, FILE_NAME))
try:
    # Adjust locations of the dots
    for i, coord in enumerate(zip(*(iter(coords),)*3)):
        bpy.data.objects[name_of_vertex(i)].location = coord

    # Deselect everything
    bpy.ops.object.select_all(action='DESELECT')
    # Get and surface object
    surface = bpy.data.objects[OBJ_GEOMETRY]
    surface.select = True
    bpy.context.scene.objects.active = surface

    # If surface is a plane
    if not SURF_TYPE:
        modifier = surface.modifiers.new('Solidify', 'SOLIDIFY')
import pyopencl as cl
import pyopencl.array as cl_array
import utils
import numpy as np
import math
import time

FILE_PATH = './data/data_clean.json'
COUNT_RUN = 5

mem_flags = cl.mem_flags

if __name__ == '__main__':
    clusters = utils.load_from_file(FILE_PATH)['clusters']
    allData = []
    clusterInfo = []
    countData = 0

    for c in clusters:
        countData += len(c)
        for member in c:
            allData.append(member)
        clusterInfo.append(len(c))

    data = np.array(allData, np.float32)
    data_len = len(allData)
    vec_size = len(data[0])
    clusterInfoBuff = np.array(clusterInfo, np.int32)

    # create empty matrix
    matrix = np.zeros(data_len**2, np.float32)
Beispiel #25
0
def step5_create_plots(df, df_full):
    """Produce and store graphs/plots"""
    figures = {}
    pd.options.plotting.backend = 'plotly'

    # Prepare data frames
    df = df.transpose()
    df_full = df_full.drop(['cuisine'], axis=1)

    # Create heatmap
    fig_hm = create_heatmap(df, defs.X_ADD_FLAGS,
                            defs.MARKER_ON_DIAGONAL_CELLS)
    figures['correlation_heatmap'] = fig_hm

    # Create full heatmap
    if defs.PRODUCE_FULL_HEATMAP:
        fig_hm_full = create_heatmap(df_full.transpose(), False, False, True)
        figures['correlation_heatmap_full'] = fig_hm_full

    # Create statistics graphs
    fig_sum_cuisines = create_bar_sum_cuisines(df_full)
    figures['cumulative_cuisines_length'] = fig_sum_cuisines
    fig_sum_languages = create_bar_sum_languages(df_full)
    figures['cumulative_languages_length'] = fig_sum_languages

    # Create histogram
    if defs.PRODUCE_HISTOGRAM:
        fig_hist = df_full.hist()
        figures['historgram'] = fig_hist

    # Create statistics
    if defs.STORE_STATISTICS:
        pd.set_option('display.float_format', '{:.0f}'.format)
        pd.set_option('display.max_rows', None)
        pd.set_option('display.max_colwidth', None)
        # Cuisine leaderboard
        sum_data = df_full.transpose().sum().astype(int)
        leaderboard = sum_data.to_frame('length').sort_values(
            'length', ascending=False)[0:30]
        leaderboard.index = [
            f"{flag} {cuisine}" for flag, cuisine in zip(
                get_flags_from_demonyms(leaderboard.index),
                leaderboard.index.to_list())
        ]
        with open(Path(f'results/cuisines_leaderboard.md'), 'w') as fp:
            fp.write(leaderboard.to_markdown())
        # Top voices
        cc2 = load_from_file('data/cuisines_langs.dat')
        df_topvoices = pd.DataFrame(
            columns=['cuisine', 'language', 'length', 'url'])
        # yapf: disable
        for cuisine, rw in df_full.iterrows():
            for lang, length in rw.to_frame('length').sort_values('length',ascending=False)[0:3]['length'].iteritems():
                if not np.isnan(length):
                    df_topvoices = df_topvoices.append({'cuisine': cuisine,
                                                        'language': lang,
                                                        'length': length,},
                                                       ignore_index=True)
        # yapf: enable
        df_topvoices = df_topvoices.sort_values('length',
                                                ascending=False)[0:10]
        df_topvoices.reset_index(drop=True, inplace=True)
        urls = {}
        for idx, row in df_topvoices.iterrows():
            wikipage = cc2[f'{row["cuisine"]} cuisine']['languages'][
                row['language']]
            if row['language'] == 'en':
                wikiurl = 'en.wikipedia.org'
            else:
                wikiurl = wikipage['wiki_url']
            urls[
                idx] = f'[{row["cuisine"]} cuisine ({row["language"]})]' + '(https://' + wikiurl + '/wiki/' + wikipage[
                    'title'].replace(' ', '_') + ')'
        for kk, vv in urls.items():
            df_topvoices['url'][kk] = vv
        df_topvoices['cuisine'] = [
            f"{flag} {cuisine}" for flag, cuisine in zip(
                get_flags_from_demonyms(df_topvoices['cuisine']),
                df_topvoices['cuisine'].to_list())
        ]
        df_topvoices['language'] = get_languages_names(
            df_topvoices['language'])

        with open(Path(f'results/cuisines_top.md'), 'w') as fp:
            fp.write(df_topvoices.to_markdown())

    # Show plots in-browser
    if defs.SHOW_RESULTS:
        for fig_name, fig in figures.items():
            fig.show()

    # Store results (html/images)
    Path('results').mkdir(parents=True, exist_ok=True)
    for fig_name, fig in figures.items():
        if defs.STORE_HTML:
            with open(Path(f'results/{fig_name}.html'), 'w+') as fp:
                fp.write(fig.to_html())
        if defs.STORE_IMAGE:
            # Remove axes titles for image
            fig.update_layout(xaxis={'title': {
                'text': ''
            }},
                              yaxis={'title': {
                                  'text': ''
                              }})
            with open(Path(f'results/{fig_name}.jpg'), 'wb+') as fp:
                fp.write(
                    fig.to_image(format='jpg',
                                 width=1920,
                                 height=1080,
                                 scale=2.0))
Beispiel #26
0
 def recover_from_auto_save(self):
     self._auto_save_time = self._origo[PROP_TEXT_TIMER]
     self._surface.deserialise(load_from_file(INT_AUTO_SAVE_FILE))
     print('[OKAY] file has been recovered from:', INT_AUTO_SAVE_FILE)
Beispiel #27
0
 def __init__(self):
     self.stop_words = set(stopwords.words('english'))
     self.NB_pipeline = utils.load_from_file("nb_pipeline.pkl")
Beispiel #28
0
  '''
    dates = list(member_data.keys())
    dates.sort()
    if len(dates) > 1:
        if len(member_data[dates[-1]]) > len(member_data[dates[-2]]):
            return True
    return False


if __name__ == "__main__":
    # Test sum donation
    dstr = "2017-09-01"
    print("date str of {} is {}.".format(dstr, str2date(dstr)))
    #
    fname = os.path.join(utils.get_raw_data_path(), 'member_data.txt')
    member_data = utils.load_from_file(fname)
    sum_donation = get_sum_donations(member_data[max(member_data)])
    print("sum donation is {} dollar".format(sum_donation))

    # Test donations by division
    fname = os.path.join(utils.get_raw_data_path(), 'members_divisions.txt')
    members_divisions = utils.load_from_file(fname)
    fname = os.path.join(utils.get_raw_data_path(), 'ericsson_divisions.txt')
    ericsson_divisions = utils.load_from_file(fname)
    dbd = get_donation_by_division(member_data, members_divisions)
    print(dbd)

    # Test all members divisions
    members_divs = get_all_members_division(member_data, members_divisions)
    print(members_divs)
Beispiel #29
0
  def test(self, params):
    print('\n%s: testing...' %datetime.now())
    sys.stdout.flush()

    session = Session(self._graph, self.results_dir, params['model_name'])
    if 'init_step' not in params or params['init_step'] is None:
      init_step = session.init_step
    else:
      init_step = params['init_step']

    if 'step_num' not in params or params['step_num'] is None:
      step_num = int(np.ceil(np.float(self.fold_size) / self._batch_size))
    else:
      step_num = params['step_num']

    results_file_name = Tester.RESULTS_FILE + '-' + str(init_step) + '-' + \
                        self.fold_name + '-' + str(step_num) + '.json'
    results_file = os.path.join(self.results_dir, results_file_name)
    if not params['load_results'] or not os.path.isfile(results_file):
      session.init(self._classifier, init_step, params['restoring_file'])
      session.start()
      if init_step == 0:
        print 'WARNING: testing an untrained model'
      total_step_num = step_num * params['epoch_num']
      test_num = total_step_num * self._batch_size
      print('%s: test_num=%d' % (datetime.now(), step_num * self._batch_size))
      print('%s: epoch_num=%d' % (datetime.now(), params['epoch_num']))

      results = {}
      results['losses'] = np.zeros(test_num, dtype=np.float32)
      results['probs'] = np.zeros((test_num, Reader.CLASSES_NUM), dtype=np.float32)
      results['labels'] = np.zeros(test_num, dtype=np.int64)

      start_time = time.time()
      for step in range(total_step_num):
        #print('%s: eval_iter=%d' %(datetime.now(), i))
        loss_batch, prob_batch, label_batch = session.run(
          [self._cross_entropy_losses, self._probs, self._input['labels']]
        )
        begin = step * self._batch_size
        results['losses'][begin:begin+self._batch_size] = loss_batch
        results['probs'][begin:begin+self._batch_size, :] = prob_batch
        results['labels'][begin:begin + self._batch_size] = label_batch
        if (step+1) % step_num == 0:
          print "Epoch num: %d" % ((step+1)/step_num)
        if session.should_stop():
          break

      duration = time.time() - start_time
      print('%s: duration = %.1f sec' %(datetime.now(), float(duration)))
      sys.stdout.flush()
      if self.writer is not None:
        summary_str = session.run(self._all_summaries)
        self.writer.write_summaries(summary_str, init_step)

      session.stop()
    else:
      print 'WARNING: using precomputed results'
      results = utils.load_from_file(results_file)

    results['loss'] = np.mean(results['losses']).item()
    results = self.get_all_stats(results)
    if self.writer is not None and not params['load_results']:
      self.writer.write_scalars({'losses/testing/cross_entropy_loss': results['loss'],
                                 'accuracy': results['accuracy']}, init_step)
    utils.dump_to_file(results, results_file)

    return init_step, results['loss']