Beispiel #1
0
 def classify(self, song_df):
     """
     Classifier checks for each value's profile of output_column which is
     most similar to the song's profile (by an argmax in each group of types_grouped)
     """
     if self.similarity == '-m':
         return {
             t: group.loc[np.argmin(
                 group.apply(lambda row: manhatten(
                     row['profile'],
                     get_profile(song_df, self.N, self.ngram_type)),
                             axis=1))]['name']
             for t, group in self.types_grouped
         }
     elif self.similarity == '-d':
         return {
             t: group.loc[np.argmax(
                 group.apply(lambda row: dice(
                     row['profile'],
                     get_profile(song_df, self.N, self.ngram_type)),
                             axis=1))]['name']
             for t, group in self.types_grouped
         }
     else:
         return {
             t: group.loc[np.argmax(
                 group.apply(lambda row: similarity(
                     row['profile'],
                     get_profile(song_df, self.N, self.ngram_type)),
                             axis=1))]['name']
             for t, group in self.types_grouped
         }
Beispiel #2
0
def get_profile():
    try:
        username = request.args.get('user')
        return profile.get_profile(username)
    except:
        return jsonify(success=False,
                       reason='Could not get profile due to a Server Error')
Beispiel #3
0
def test_get_profile_too_many_sports():
    with pytest.raises(ValueError):
        sports = [
            'tennis', 'basketball', 'badminton', 'baseball', 'volleyball',
            'boxing'
        ]
        assert get_profile('tim', 36, *sports)
 def learn(self, input_data_file):
     df = pd.read_csv(input_data_file, sep=';', index_col=0, names=self.column_names)
     logging.info('Making profiles for songs')
     df['profile'] = df.apply(lambda row: get_profile(pd.read_csv("unigram/" + str(row.name) + ".csv", index_col=0), self.N, self.ngram_type), axis=1)  # Make profile for each song in input
     logging.info('Profiles for songs made')
     logging.info('Making profiles for types')
     type_profiles = pd.DataFrame(columns=['type', 'name', 'profile'])
     for output_column in self.output_names:  # For each output type (e.g. Performer, Year,…)…
         grouped = df.groupby(output_column)  # …group the rows by that output type…
         for name, group in grouped:
             profile = group.loc[:, 'profile'].sum()  # …and build a profile for each instance of a type (e.g. profile of a specific performer) by summing the profiles of the songs with that specific output type
             if self.profile_size > 0:
                 profile = Counter(profile.most_common(self.profile_size))
             type_profiles = type_profiles.append({'type': output_column, 'name': name, 'profile': profile}, ignore_index=True)
     logging.info('Profiles for types made')
     logging.info('Applying LexRank to profiles')
     for idx, row in type_profiles.iterrows():
         importance = self.__lexrank(row['profile'], 0.01)
         mask = np.where(importance > np.mean(importance), True, False)
         flat_profile = list(row['profile'].keys())
         reduced_profile = []
         for i, x in enumerate(mask):
             if x:
                 reduced_profile.append(flat_profile[i])
         row['profile'] = Counter(reduced_profile)
     logging.info('LexRank applied to profiles')
     self.types_grouped = type_profiles.groupby('type')  # Group of all performers, group of all years,…
     return
Beispiel #5
0
def main():
    profile_name = sys.argv[1]
    job_id = sys.argv[2]
    pf = profile.get_profile(profile_name, job_id)

    # Cleanup existing result directory and create a new one
    result_file_name = strftime('%m-%d-%H%M') + '.tar.gz'

    result_base_path = pf.get_result_base_path()
    os.system('mkdir -p %s' % result_base_path)

    repeat = int(pf.config.get('experiment', 'repeat'))

    # Do all experiments here
    experiment_on_latency(pf, int(pf.config.get('experiment', 'default_num_cassandra_nodes')), repeat)
    # experiment_on_throughputs(pf, int(pf.config.get('experiment', 'default_num_cassandra_nodes')), repeat)
    # experiment_on_num_cassandra_nodes_and_throughput(pf, repeat)
    # experiment_on_num_cassandra_nodes_with_no_throughput_limit(pf, repeat)
    # experiment_on_num_ycsb_threads(pf)
    # experiment_on_latency_scalability(pf)

    # experiment_on_pbs(pf, repeat)

    # Copy log to result directory
    os.system('cp %s/bw-cassandra-log-%s.txt %s/bw-cassandra-log.txt' % (pf.config.get('path', 'log_path'), job_id, result_base_path))

    # Archive the result and send to remote server
    os.system('tar -czf /tmp/%s -C %s .'
              % (result_file_name, result_base_path))
    private_key_path = pf.config.get('path', 'base_path')
    os.system('scp -o StrictHostKeyChecking=no -P8888 -i %s/sshuser_key /tmp/%s [email protected]:%s/'
              % (private_key_path, result_file_name, pf.get_name()))
    os.system('rm /tmp/%s' % result_file_name)
Beispiel #6
0
 def learn(self, input_data_file):
     """Make a classifier based on ngram profiles"""
     df = pd.read_csv(input_data_file,
                      sep=';',
                      index_col=0,
                      names=self.column_names)
     logging.info('Making profiles for songs')
     df['profile'] = df.apply(lambda row: get_profile(
         pd.read_csv("unigram/" + str(row.name) + ".csv", index_col=0), self
         .N, self.ngram_type),
                              axis=1)  # Make profile for each song in input
     logging.info('Profiles for songs made')
     logging.info('Making profiles for types')
     type_profiles = pd.DataFrame(columns=['type', 'name', 'profile'])
     for output_column in self.output_names:  # For each output type (e.g. Performer, Year,…)…
         grouped = df.groupby(
             output_column)  # …group the rows by that output type…
         for name, group in grouped:
             profile = group.loc[:, 'profile'].sum(
             )  # …and build a profile for each instance of a type (e.g. profile of a specific performer) by summing the profiles of the songs with that specific output type
             if self.profile_size > 0:
                 profile = Counter(profile.most_common(self.profile_size))
             type_profiles = type_profiles.append(
                 {
                     'type': output_column,
                     'name': name,
                     'profile': profile
                 },
                 ignore_index=True)
     logging.info('Profiles for types made')
     self.types_grouped = type_profiles.groupby(
         'type')  # Group of all performers, group of all years,…
     return
def main():
    profile_name = sys.argv[1]
    job_id = sys.argv[2]
    pf = profile.get_profile(profile_name, job_id)

    # Cleanup existing result directory and create a new one
    result_file_name = strftime('%m-%d-%H%M') + '.tar.gz'

    result_base_path = pf.get_result_base_path()
    os.system('mkdir %s' % result_base_path)

    repeat = int(pf.config.get('experiment', 'repeat'))

    # Do all experiments here
    # experiment_on_throughputs(pf, int(pf.config.get('experiment', 'default_num_cassandra_nodes')), repeat)
    # experiment_on_num_cassandra_nodes_and_throughput(pf, repeat)
    # experiment_on_num_cassandra_nodes_with_no_throughput_limit(pf, repeat)
    # experiment_on_num_ycsb_threads(pf)
    # experiment_on_latency_scalability(pf)

    experiment_on_pbs(pf, repeat)

    # Copy log to result directory
    os.system('cp %s/bw-cassandra-log-%s.txt %s/bw-cassandra-log.txt' %
              (pf.config.get('path', 'log_path'), job_id, result_base_path))

    # Archive the result and send to remote server
    os.system('tar -czf /tmp/%s -C %s .' %
              (result_file_name, result_base_path))
    private_key_path = pf.config.get('path', 'base_path')
    os.system(
        'scp -o StrictHostKeyChecking=no -P8888 -i %s/sshuser_key /tmp/%s [email protected]:%s/'
        % (private_key_path, result_file_name, pf.get_name()))
    os.system('rm /tmp/%s' % result_file_name)
Beispiel #8
0
def get_profile_api():
    token = flask.request.args.get('token', '')
    print(token)
    if not token_check(token):
        return "Session Expired"
    json = profile.get_profile(token)
    del json["password"]
    return flask.jsonify(profile=json)
Beispiel #9
0
def embedProfile(profileId):
    """
    Embed the profile.
    
    Returns javascript file that does a document.write to insert the graphs.
    """
    profile = get_profile(profileId)
    if profile:
        return Response(create_embed(profile), mimetype='application/javascript')
 def test(self, test_data_file):
     """Classify all songs in a test set"""
     df = pd.read_csv(test_data_file, sep=';', index_col=0, names=self.column_names)
     df['profile'] = df.apply(lambda row: get_profile(pd.read_csv("unigram/" + str(row.name) + ".csv", index_col=0), self.N, self.ngram_type), axis=1)
     test_input = np.float32(self.v.transform(df['profile']))
     output_arrays = [learner['label encoder'].inverse_transform(learner['learner'].predict(test_input)) if learner['type'] == 'classifier' else learner['learner'].predict(test_input).flatten() for learner in self.__learners]
     dicts = [dict(zip(self.output_names, row)) for row in zip(*output_arrays)]
     output_df = pd.DataFrame(columns=self.output_names)
     output_df = output_df.append(dicts, ignore_index=True)
     return output_df
Beispiel #11
0
def test_get_profile_award():
    expected = {
        "name": "tim",
        "age": 36,
        "awards": {
            "champ": "helped out team in crisis"
        },
    }
    assert (get_profile("tim", 36,
                        champ="helped out team in crisis") == expected)
Beispiel #12
0
def test_get_profile_award():
    expected = {
        'name': 'tim',
        'age': 36,
        'awards': {
            'champ': 'helped out team in crisis'
        }
    }
    assert get_profile('tim', 36,
                       champ='helped out team in crisis') == expected
Beispiel #13
0
def getProfile(profileId):
    """
    Get the profile JSON
    
    Returns: Profile JSON
    """
    if profileId == "test":
        return jsonify( test_profile )
    else:
        return jsonify( get_profile(profileId) )
Beispiel #14
0
def test_get_profile_too_many_sports():
    with pytest.raises(ValueError):
        sports = [
            "tennis",
            "basketball",
            "badminton",
            "baseball",
            "volleyball",
            "boxing",
        ]
        assert get_profile("tim", 36, *sports)
Beispiel #15
0
def data_prep(seed):
    profile = profile.Profile()
    interest = interest.Interest()
    preprocess = preprocess.Preprocessor()
    profile_raw = profile.get_profile()
    interest_raw, ids = interest.data_merge()
    data = preprocess.finalize_data(profile_raw, interest_raw)
    X, y, X_train, y_train, X_test, y_test = preprocess.split_data(data,
                                                                   seed=seed,
                                                                   re=False)
    return X, y, X_train, y_train, X_test, y_test, ids
Beispiel #16
0
def test_get_profile_two_sports_and_one_award():
    expected = {
        'name': 'tim',
        'age': 36,
        'sports': ['basketball', 'tennis'],
        'awards': {
            'champ': 'helped out team in crisis'
        }
    }
    assert get_profile('tim',
                       36,
                       'tennis',
                       'basketball',
                       champ='helped out team in crisis') == expected
Beispiel #17
0
def test_get_profile_two_sports_and_one_award():
    expected = {
        "name": "tim",
        "age": 36,
        "sports": ["basketball", "tennis"],
        "awards": {
            "champ": "helped out team in crisis"
        },
    }
    assert (get_profile("tim",
                        36,
                        "tennis",
                        "basketball",
                        champ="helped out team in crisis") == expected)
Beispiel #18
0
def order_info(request):
    if request.method == 'POST':
        post_data = request.POST.copy()
        form = UserProfileForm(None, post_data)
        if form.is_valid():
            profile.set_profile(request)
            url = urlresolvers.reverse('my_account')
            return HttpResponseRedirect(url)
    else:
        user_profile = profile.get_profile(request)
        form = UserProfileForm(email_from_user=request.user.email,
                               instance=user_profile)
    page_title = 'Edit Order Information'
    return render_to_response("registration/order_info.html",
                              locals(),
                              context_instance=RequestContext(request))
 def learn(self, input_data_file):
     """Make a classifier based on ngram profiles"""
     df = pd.read_csv(input_data_file, sep=';', index_col=0, names=self.column_names)
     logging.info('Making profiles for songs')
     df['profile'] = df.apply(lambda row: get_profile(pd.read_csv("unigram/" + str(row.name) + ".csv", index_col=0), self.N, self.ngram_type), axis=1)  # Make profile for each song in input
     logging.info('Profiles for songs made')
     logging.info('Making profiles for types')
     type_profiles = pd.DataFrame(columns=['type', 'name', 'profile'])
     for output_column in self.output_names:  # For each output type (e.g. Performer, Year,…)…
         grouped = df.groupby(output_column)  # …group the rows by that output type…
         for name, group in grouped:
             profile = group.loc[:, 'profile'].sum()  # …and build a profile for each instance of a type (e.g. profile of a specific performer) by summing the profiles of the songs with that specific output type
             if self.profile_size > 0:
                 profile = Counter(profile.most_common(self.profile_size))
             type_profiles = type_profiles.append({'type': output_column, 'name': name, 'profile': profile}, ignore_index=True)
     logging.info('Profiles for types made')
     self.types_grouped = type_profiles.groupby('type')  # Group of all performers, group of all years,…
     return
Beispiel #20
0
def test_get_profile_two_sports_and_three_awards():
    expected = {
        'name': 'tim',
        'age': 36,
        'sports': ['basketball', 'tennis'],
        'awards': {
            'champ': 'helped out the team in crisis',
            'service': 'going the extra mile for our customers',
            'attitude': 'unbeatable positive + uplifting'
        }
    }
    assert get_profile('tim',
                       36,
                       'tennis',
                       'basketball',
                       service='going the extra mile for our customers',
                       champ='helped out the team in crisis',
                       attitude='unbeatable positive + uplifting') == expected
Beispiel #21
0
def wish_list(request):
    user_profile = profile.get_profile(request)
    products = user_profile.wish_list.all()
    title_head = "Tu lista de deseos"
    if products:
        products, order_by_form = order_products(request, products)
        num_x_pag, product_per_pag_form = get_num_x_pag(request)
        products, order_by_brand_form = filter_products(request, products)
        paginator, products_per_pag = get_paginator(request, products,
                                                    num_x_pag)
        show_toolbar = True
    else:
        show_toolbar = False
        paginator, products_per_pag = get_paginator(request, [], 1)
    product_row = get_product_row(products_per_pag)

    return render_to_response("tags/product_list.html",
                              locals(),
                              context_instance=RequestContext(request))
Beispiel #22
0
def test_get_profile_two_sports_and_three_awards():
    expected = {
        "name": "tim",
        "age": 36,
        "sports": ["basketball", "tennis"],
        "awards": {
            "champ": "helped out the team in crisis",
            "service": "going the extra mile for our customers",
            "attitude": "unbeatable positive + uplifting",
        },
    }
    assert (get_profile(
        "tim",
        36,
        "tennis",
        "basketball",
        service="going the extra mile for our customers",
        champ="helped out the team in crisis",
        attitude="unbeatable positive + uplifting",
    ) == expected)
Beispiel #23
0
def main():
    profile_name = sys.argv[1]
    job_id = sys.argv[2]
    pf = profile.get_profile(profile_name, job_id)

    packet_sizes = [64, 1024, 4096, 16384]
    all_hosts = pf.get_hosts()

    result_base_path = '/projects/sciteam/jsb/ghosh1/latency/%s' % job_id

    for packet_size in packet_sizes:
        logger.debug('Running experiment for packet_size: %d' % packet_size)
        threads = []
        # Kill cassandra on all hosts
        for host in all_hosts:
            current_thread = RunExperimentThread(result_base_path, host, packet_size, all_hosts)
            threads.append(current_thread)
            current_thread.start()

        for t in threads:
            t.join()
Beispiel #24
0
def main():
    profile_name = sys.argv[1]
    job_id = sys.argv[2]
    pf = profile.get_profile(profile_name, job_id)

    packet_sizes = [64, 1024, 4096, 16384]
    all_hosts = pf.get_hosts()

    result_base_path = '/projects/sciteam/jsb/ghosh1/latency/%s' % job_id

    for packet_size in packet_sizes:
        logger.debug('Running experiment for packet_size: %d' % packet_size)
        threads = []
        # Kill cassandra on all hosts
        for host in all_hosts:
            current_thread = RunExperimentThread(result_base_path, host,
                                                 packet_size, all_hosts)
            threads.append(current_thread)
            current_thread.start()

        for t in threads:
            t.join()
 def learn(self, input_data_file):
     """Train learners using profiles of songs"""
     df = pd.read_csv(input_data_file, sep=';', index_col=0, names=self.column_names)
     logging.info('Making profiles for songs')
     df['profile'] = df.apply(lambda row: get_profile(pd.read_csv("unigram/" + str(row.name) + ".csv", index_col=0), self.N, self.ngram_type), axis=1)  # Make profile for each song in input
     self.v = DictVectorizer(sparse=False)
     self.v.fit(df['profile'])
     training_input = self.v.transform(df['profile'])
     logging.info('Made profiles, now making and fitting learners')
     for output_name in self.output_names:
         training_output = np.array(df[output_name])
         if output_name in ['Year', 'Tempo']:
             regressor = self.get_regressor(self.regressor_type, output_name, training_input.shape[1], 1, self.regressor_args)
             regressor.fit(np.float32(training_input), np.float32(training_output))
             self.__learners.append({'output name': output_name, 'learner': regressor, 'type': 'regressor'})
         else:
             label_encoder = LabelEncoder()
             labels = label_encoder.fit_transform(training_output)
             classifier = self.get_classifier(self.classifier_type, output_name, training_input.shape[1], len(np.unique(training_output)), self.classifier_args)
             classifier.fit(np.float32(training_input), np.int32(labels))
             self.__learners.append({'ouput name': output_name, 'label encoder': label_encoder, 'learner': classifier, 'type': 'classifier'})
     logging.info('Made and fit learners')
     return
Beispiel #26
0
def get_profile(id):
    serializer = ProfileSerializer(profile.get_profile(id))
    return serializer.data
Beispiel #27
0
 def __get__(self, request, obj_type=None):
     #Set the cached profile if it doesn't already exist (that is, only query once):        
     if not hasattr(request, '_cached_profile'):
         from profile import get_profile            
         request._cached_profile = get_profile(request, self.app_id)               
     return request._cached_profile
Beispiel #28
0
def test_get_profile_no_age():
    with pytest.raises(TypeError):
        assert get_profile('tim')
Beispiel #29
0
def test_get_profile_one_sport():
    expected = {"name": "tim", "age": 36, "sports": ["tennis"]}
    assert get_profile("tim", 36, "tennis") == expected
Beispiel #30
0
def test_get_profile_no_name():
    with pytest.raises(TypeError):
        assert get_profile()
Beispiel #31
0
def test_get_profile_two_sports():
    expected = {"name": "tim", "age": 36, "sports": ["basketball", "tennis"]}
    assert get_profile("tim", 36, "tennis", "basketball") == expected
Beispiel #32
0
def test_get_profile_two_sports():
    expected = {'name': 'tim', 'age': 36, 'sports': ['basketball', 'tennis']}
    assert get_profile('tim', 36, 'tennis', 'basketball') == expected
Beispiel #33
0
def test_get_profile_one_sport():
    expected = {'name': 'tim', 'age': 36, 'sports': ['tennis']}
    assert get_profile('tim', 36, 'tennis') == expected
 def test_mod_profile(self):
     profile.mod_profile(self.modprofile)
     prof = profile.get_profile(self.token)
     self.assertEqual(prof["name"], "altro")
Beispiel #35
0
def test_get_profile_dict():
    assert get_profile("tim", 36) == {"name": "tim", "age": 36}
Beispiel #36
0
def test_get_profile_valueerror():
    with pytest.raises(ValueError):
        assert get_profile("tim", "nonint")
Beispiel #37
0
def test_get_profile_dict():
    assert get_profile('tim', 36) == {'name': 'tim', 'age': 36}
Beispiel #38
0
def test_get_profile_valueerror():
    with pytest.raises(ValueError):
        assert get_profile('tim', 'nonint')
 def classify(self, song_df):
     """Classifier checks for each value's profile of output_column which is most similar to the song's profile (by an argmax in each group of types_grouped)"""
     return {t: group.loc[np.argmax(group.apply(lambda row: similarity(row['profile'], get_profile(song_df, self.N, self.ngram_type)), axis=1))]['name'] for t, group in self.types_grouped}
 def classify(self, song_df):
     """Classify a specific song"""
     test_input = np.float32(self.v.transform(get_profile(song_df, self.N, self.ngram_type)))
     return {learner['output name']: learner['label encoder'].inverse_transform(learner['learner'].predict(test_input)) if learner['type'] == 'classifier' else learner['learner'].predict(test_input) for learner in self.__learners}