def classify(self, song_df): """ Classifier checks for each value's profile of output_column which is most similar to the song's profile (by an argmax in each group of types_grouped) """ if self.similarity == '-m': return { t: group.loc[np.argmin( group.apply(lambda row: manhatten( row['profile'], get_profile(song_df, self.N, self.ngram_type)), axis=1))]['name'] for t, group in self.types_grouped } elif self.similarity == '-d': return { t: group.loc[np.argmax( group.apply(lambda row: dice( row['profile'], get_profile(song_df, self.N, self.ngram_type)), axis=1))]['name'] for t, group in self.types_grouped } else: return { t: group.loc[np.argmax( group.apply(lambda row: similarity( row['profile'], get_profile(song_df, self.N, self.ngram_type)), axis=1))]['name'] for t, group in self.types_grouped }
def get_profile(): try: username = request.args.get('user') return profile.get_profile(username) except: return jsonify(success=False, reason='Could not get profile due to a Server Error')
def test_get_profile_too_many_sports(): with pytest.raises(ValueError): sports = [ 'tennis', 'basketball', 'badminton', 'baseball', 'volleyball', 'boxing' ] assert get_profile('tim', 36, *sports)
def learn(self, input_data_file): df = pd.read_csv(input_data_file, sep=';', index_col=0, names=self.column_names) logging.info('Making profiles for songs') df['profile'] = df.apply(lambda row: get_profile(pd.read_csv("unigram/" + str(row.name) + ".csv", index_col=0), self.N, self.ngram_type), axis=1) # Make profile for each song in input logging.info('Profiles for songs made') logging.info('Making profiles for types') type_profiles = pd.DataFrame(columns=['type', 'name', 'profile']) for output_column in self.output_names: # For each output type (e.g. Performer, Year,…)… grouped = df.groupby(output_column) # …group the rows by that output type… for name, group in grouped: profile = group.loc[:, 'profile'].sum() # …and build a profile for each instance of a type (e.g. profile of a specific performer) by summing the profiles of the songs with that specific output type if self.profile_size > 0: profile = Counter(profile.most_common(self.profile_size)) type_profiles = type_profiles.append({'type': output_column, 'name': name, 'profile': profile}, ignore_index=True) logging.info('Profiles for types made') logging.info('Applying LexRank to profiles') for idx, row in type_profiles.iterrows(): importance = self.__lexrank(row['profile'], 0.01) mask = np.where(importance > np.mean(importance), True, False) flat_profile = list(row['profile'].keys()) reduced_profile = [] for i, x in enumerate(mask): if x: reduced_profile.append(flat_profile[i]) row['profile'] = Counter(reduced_profile) logging.info('LexRank applied to profiles') self.types_grouped = type_profiles.groupby('type') # Group of all performers, group of all years,… return
def main(): profile_name = sys.argv[1] job_id = sys.argv[2] pf = profile.get_profile(profile_name, job_id) # Cleanup existing result directory and create a new one result_file_name = strftime('%m-%d-%H%M') + '.tar.gz' result_base_path = pf.get_result_base_path() os.system('mkdir -p %s' % result_base_path) repeat = int(pf.config.get('experiment', 'repeat')) # Do all experiments here experiment_on_latency(pf, int(pf.config.get('experiment', 'default_num_cassandra_nodes')), repeat) # experiment_on_throughputs(pf, int(pf.config.get('experiment', 'default_num_cassandra_nodes')), repeat) # experiment_on_num_cassandra_nodes_and_throughput(pf, repeat) # experiment_on_num_cassandra_nodes_with_no_throughput_limit(pf, repeat) # experiment_on_num_ycsb_threads(pf) # experiment_on_latency_scalability(pf) # experiment_on_pbs(pf, repeat) # Copy log to result directory os.system('cp %s/bw-cassandra-log-%s.txt %s/bw-cassandra-log.txt' % (pf.config.get('path', 'log_path'), job_id, result_base_path)) # Archive the result and send to remote server os.system('tar -czf /tmp/%s -C %s .' % (result_file_name, result_base_path)) private_key_path = pf.config.get('path', 'base_path') os.system('scp -o StrictHostKeyChecking=no -P8888 -i %s/sshuser_key /tmp/%s [email protected]:%s/' % (private_key_path, result_file_name, pf.get_name())) os.system('rm /tmp/%s' % result_file_name)
def learn(self, input_data_file): """Make a classifier based on ngram profiles""" df = pd.read_csv(input_data_file, sep=';', index_col=0, names=self.column_names) logging.info('Making profiles for songs') df['profile'] = df.apply(lambda row: get_profile( pd.read_csv("unigram/" + str(row.name) + ".csv", index_col=0), self .N, self.ngram_type), axis=1) # Make profile for each song in input logging.info('Profiles for songs made') logging.info('Making profiles for types') type_profiles = pd.DataFrame(columns=['type', 'name', 'profile']) for output_column in self.output_names: # For each output type (e.g. Performer, Year,…)… grouped = df.groupby( output_column) # …group the rows by that output type… for name, group in grouped: profile = group.loc[:, 'profile'].sum( ) # …and build a profile for each instance of a type (e.g. profile of a specific performer) by summing the profiles of the songs with that specific output type if self.profile_size > 0: profile = Counter(profile.most_common(self.profile_size)) type_profiles = type_profiles.append( { 'type': output_column, 'name': name, 'profile': profile }, ignore_index=True) logging.info('Profiles for types made') self.types_grouped = type_profiles.groupby( 'type') # Group of all performers, group of all years,… return
def main(): profile_name = sys.argv[1] job_id = sys.argv[2] pf = profile.get_profile(profile_name, job_id) # Cleanup existing result directory and create a new one result_file_name = strftime('%m-%d-%H%M') + '.tar.gz' result_base_path = pf.get_result_base_path() os.system('mkdir %s' % result_base_path) repeat = int(pf.config.get('experiment', 'repeat')) # Do all experiments here # experiment_on_throughputs(pf, int(pf.config.get('experiment', 'default_num_cassandra_nodes')), repeat) # experiment_on_num_cassandra_nodes_and_throughput(pf, repeat) # experiment_on_num_cassandra_nodes_with_no_throughput_limit(pf, repeat) # experiment_on_num_ycsb_threads(pf) # experiment_on_latency_scalability(pf) experiment_on_pbs(pf, repeat) # Copy log to result directory os.system('cp %s/bw-cassandra-log-%s.txt %s/bw-cassandra-log.txt' % (pf.config.get('path', 'log_path'), job_id, result_base_path)) # Archive the result and send to remote server os.system('tar -czf /tmp/%s -C %s .' % (result_file_name, result_base_path)) private_key_path = pf.config.get('path', 'base_path') os.system( 'scp -o StrictHostKeyChecking=no -P8888 -i %s/sshuser_key /tmp/%s [email protected]:%s/' % (private_key_path, result_file_name, pf.get_name())) os.system('rm /tmp/%s' % result_file_name)
def get_profile_api(): token = flask.request.args.get('token', '') print(token) if not token_check(token): return "Session Expired" json = profile.get_profile(token) del json["password"] return flask.jsonify(profile=json)
def embedProfile(profileId): """ Embed the profile. Returns javascript file that does a document.write to insert the graphs. """ profile = get_profile(profileId) if profile: return Response(create_embed(profile), mimetype='application/javascript')
def test(self, test_data_file): """Classify all songs in a test set""" df = pd.read_csv(test_data_file, sep=';', index_col=0, names=self.column_names) df['profile'] = df.apply(lambda row: get_profile(pd.read_csv("unigram/" + str(row.name) + ".csv", index_col=0), self.N, self.ngram_type), axis=1) test_input = np.float32(self.v.transform(df['profile'])) output_arrays = [learner['label encoder'].inverse_transform(learner['learner'].predict(test_input)) if learner['type'] == 'classifier' else learner['learner'].predict(test_input).flatten() for learner in self.__learners] dicts = [dict(zip(self.output_names, row)) for row in zip(*output_arrays)] output_df = pd.DataFrame(columns=self.output_names) output_df = output_df.append(dicts, ignore_index=True) return output_df
def test_get_profile_award(): expected = { "name": "tim", "age": 36, "awards": { "champ": "helped out team in crisis" }, } assert (get_profile("tim", 36, champ="helped out team in crisis") == expected)
def test_get_profile_award(): expected = { 'name': 'tim', 'age': 36, 'awards': { 'champ': 'helped out team in crisis' } } assert get_profile('tim', 36, champ='helped out team in crisis') == expected
def getProfile(profileId): """ Get the profile JSON Returns: Profile JSON """ if profileId == "test": return jsonify( test_profile ) else: return jsonify( get_profile(profileId) )
def test_get_profile_too_many_sports(): with pytest.raises(ValueError): sports = [ "tennis", "basketball", "badminton", "baseball", "volleyball", "boxing", ] assert get_profile("tim", 36, *sports)
def data_prep(seed): profile = profile.Profile() interest = interest.Interest() preprocess = preprocess.Preprocessor() profile_raw = profile.get_profile() interest_raw, ids = interest.data_merge() data = preprocess.finalize_data(profile_raw, interest_raw) X, y, X_train, y_train, X_test, y_test = preprocess.split_data(data, seed=seed, re=False) return X, y, X_train, y_train, X_test, y_test, ids
def test_get_profile_two_sports_and_one_award(): expected = { 'name': 'tim', 'age': 36, 'sports': ['basketball', 'tennis'], 'awards': { 'champ': 'helped out team in crisis' } } assert get_profile('tim', 36, 'tennis', 'basketball', champ='helped out team in crisis') == expected
def test_get_profile_two_sports_and_one_award(): expected = { "name": "tim", "age": 36, "sports": ["basketball", "tennis"], "awards": { "champ": "helped out team in crisis" }, } assert (get_profile("tim", 36, "tennis", "basketball", champ="helped out team in crisis") == expected)
def order_info(request): if request.method == 'POST': post_data = request.POST.copy() form = UserProfileForm(None, post_data) if form.is_valid(): profile.set_profile(request) url = urlresolvers.reverse('my_account') return HttpResponseRedirect(url) else: user_profile = profile.get_profile(request) form = UserProfileForm(email_from_user=request.user.email, instance=user_profile) page_title = 'Edit Order Information' return render_to_response("registration/order_info.html", locals(), context_instance=RequestContext(request))
def learn(self, input_data_file): """Make a classifier based on ngram profiles""" df = pd.read_csv(input_data_file, sep=';', index_col=0, names=self.column_names) logging.info('Making profiles for songs') df['profile'] = df.apply(lambda row: get_profile(pd.read_csv("unigram/" + str(row.name) + ".csv", index_col=0), self.N, self.ngram_type), axis=1) # Make profile for each song in input logging.info('Profiles for songs made') logging.info('Making profiles for types') type_profiles = pd.DataFrame(columns=['type', 'name', 'profile']) for output_column in self.output_names: # For each output type (e.g. Performer, Year,…)… grouped = df.groupby(output_column) # …group the rows by that output type… for name, group in grouped: profile = group.loc[:, 'profile'].sum() # …and build a profile for each instance of a type (e.g. profile of a specific performer) by summing the profiles of the songs with that specific output type if self.profile_size > 0: profile = Counter(profile.most_common(self.profile_size)) type_profiles = type_profiles.append({'type': output_column, 'name': name, 'profile': profile}, ignore_index=True) logging.info('Profiles for types made') self.types_grouped = type_profiles.groupby('type') # Group of all performers, group of all years,… return
def test_get_profile_two_sports_and_three_awards(): expected = { 'name': 'tim', 'age': 36, 'sports': ['basketball', 'tennis'], 'awards': { 'champ': 'helped out the team in crisis', 'service': 'going the extra mile for our customers', 'attitude': 'unbeatable positive + uplifting' } } assert get_profile('tim', 36, 'tennis', 'basketball', service='going the extra mile for our customers', champ='helped out the team in crisis', attitude='unbeatable positive + uplifting') == expected
def wish_list(request): user_profile = profile.get_profile(request) products = user_profile.wish_list.all() title_head = "Tu lista de deseos" if products: products, order_by_form = order_products(request, products) num_x_pag, product_per_pag_form = get_num_x_pag(request) products, order_by_brand_form = filter_products(request, products) paginator, products_per_pag = get_paginator(request, products, num_x_pag) show_toolbar = True else: show_toolbar = False paginator, products_per_pag = get_paginator(request, [], 1) product_row = get_product_row(products_per_pag) return render_to_response("tags/product_list.html", locals(), context_instance=RequestContext(request))
def test_get_profile_two_sports_and_three_awards(): expected = { "name": "tim", "age": 36, "sports": ["basketball", "tennis"], "awards": { "champ": "helped out the team in crisis", "service": "going the extra mile for our customers", "attitude": "unbeatable positive + uplifting", }, } assert (get_profile( "tim", 36, "tennis", "basketball", service="going the extra mile for our customers", champ="helped out the team in crisis", attitude="unbeatable positive + uplifting", ) == expected)
def main(): profile_name = sys.argv[1] job_id = sys.argv[2] pf = profile.get_profile(profile_name, job_id) packet_sizes = [64, 1024, 4096, 16384] all_hosts = pf.get_hosts() result_base_path = '/projects/sciteam/jsb/ghosh1/latency/%s' % job_id for packet_size in packet_sizes: logger.debug('Running experiment for packet_size: %d' % packet_size) threads = [] # Kill cassandra on all hosts for host in all_hosts: current_thread = RunExperimentThread(result_base_path, host, packet_size, all_hosts) threads.append(current_thread) current_thread.start() for t in threads: t.join()
def learn(self, input_data_file): """Train learners using profiles of songs""" df = pd.read_csv(input_data_file, sep=';', index_col=0, names=self.column_names) logging.info('Making profiles for songs') df['profile'] = df.apply(lambda row: get_profile(pd.read_csv("unigram/" + str(row.name) + ".csv", index_col=0), self.N, self.ngram_type), axis=1) # Make profile for each song in input self.v = DictVectorizer(sparse=False) self.v.fit(df['profile']) training_input = self.v.transform(df['profile']) logging.info('Made profiles, now making and fitting learners') for output_name in self.output_names: training_output = np.array(df[output_name]) if output_name in ['Year', 'Tempo']: regressor = self.get_regressor(self.regressor_type, output_name, training_input.shape[1], 1, self.regressor_args) regressor.fit(np.float32(training_input), np.float32(training_output)) self.__learners.append({'output name': output_name, 'learner': regressor, 'type': 'regressor'}) else: label_encoder = LabelEncoder() labels = label_encoder.fit_transform(training_output) classifier = self.get_classifier(self.classifier_type, output_name, training_input.shape[1], len(np.unique(training_output)), self.classifier_args) classifier.fit(np.float32(training_input), np.int32(labels)) self.__learners.append({'ouput name': output_name, 'label encoder': label_encoder, 'learner': classifier, 'type': 'classifier'}) logging.info('Made and fit learners') return
def get_profile(id): serializer = ProfileSerializer(profile.get_profile(id)) return serializer.data
def __get__(self, request, obj_type=None): #Set the cached profile if it doesn't already exist (that is, only query once): if not hasattr(request, '_cached_profile'): from profile import get_profile request._cached_profile = get_profile(request, self.app_id) return request._cached_profile
def test_get_profile_no_age(): with pytest.raises(TypeError): assert get_profile('tim')
def test_get_profile_one_sport(): expected = {"name": "tim", "age": 36, "sports": ["tennis"]} assert get_profile("tim", 36, "tennis") == expected
def test_get_profile_no_name(): with pytest.raises(TypeError): assert get_profile()
def test_get_profile_two_sports(): expected = {"name": "tim", "age": 36, "sports": ["basketball", "tennis"]} assert get_profile("tim", 36, "tennis", "basketball") == expected
def test_get_profile_two_sports(): expected = {'name': 'tim', 'age': 36, 'sports': ['basketball', 'tennis']} assert get_profile('tim', 36, 'tennis', 'basketball') == expected
def test_get_profile_one_sport(): expected = {'name': 'tim', 'age': 36, 'sports': ['tennis']} assert get_profile('tim', 36, 'tennis') == expected
def test_mod_profile(self): profile.mod_profile(self.modprofile) prof = profile.get_profile(self.token) self.assertEqual(prof["name"], "altro")
def test_get_profile_dict(): assert get_profile("tim", 36) == {"name": "tim", "age": 36}
def test_get_profile_valueerror(): with pytest.raises(ValueError): assert get_profile("tim", "nonint")
def test_get_profile_dict(): assert get_profile('tim', 36) == {'name': 'tim', 'age': 36}
def test_get_profile_valueerror(): with pytest.raises(ValueError): assert get_profile('tim', 'nonint')
def classify(self, song_df): """Classifier checks for each value's profile of output_column which is most similar to the song's profile (by an argmax in each group of types_grouped)""" return {t: group.loc[np.argmax(group.apply(lambda row: similarity(row['profile'], get_profile(song_df, self.N, self.ngram_type)), axis=1))]['name'] for t, group in self.types_grouped}
def classify(self, song_df): """Classify a specific song""" test_input = np.float32(self.v.transform(get_profile(song_df, self.N, self.ngram_type))) return {learner['output name']: learner['label encoder'].inverse_transform(learner['learner'].predict(test_input)) if learner['type'] == 'classifier' else learner['learner'].predict(test_input) for learner in self.__learners}