def main3(): trainer = loadTrainer(False, 5) probSeq = Probability(-1,'', 1.0) probCon = Probability(-1,'', 1.0) io = IOHandler() b = Builder() inputStr = "3#843" text = "e the" tmp = '' i = 0 for letter in inputStr: b.charSequenceProbablility(probSeq, letter, trainer) b.conditionalProbablility(probCon, letter, trainer) tmp += text[i] print("DEBUG Probability of word", tmp , "is ", probCon.getChild(tmp).getProbability(), probSeq.getChild(tmp).getProbability()) #Check if input word is stil within range. If not we can not calculate #probabilites i += 1 if(i >= trainer.depth): print("DEBUG: Input word is bigger then depth of training corpus. Exiting now!") break print("DEBUG Probability of word", text , "is ", probCon.getChild(text).getProbability(), probSeq.getChild(text).getProbability()) return True #main3()
def __init__(self): self.b = Builder() self.URM_train = None self.test_df = None self.target_playlists = None self.target_tracks = None self.num_playlists_to_test = 10000
def recommend_rr(self): builder = Builder() nontarget_indices = builder.get_nontarget_indices(self.target_tracks) URM_T = self.URM.T URM_tfidf_T = feature_extraction.text.TfidfTransformer().fit_transform( URM_T) URM_tfidf = URM_tfidf_T.T URM_tfidf_csr = URM_tfidf.tocsr() dataframe_list = [] print('Predicting round_robin with mode =', self.mode, '...', flush=True) for i in tqdm(range(0, self.num_playlist_to_recommend)): # Iterate over indices of target playlists index = builder.get_target_playlist_index(self.target_playlists[i]) # Compute relevant indices for the prediction known_indices = np.nonzero(self.URM[index].toarray().flatten()) # Calculate recommenders contributions icm_prediction = self.URM[index, :] * self.S_ICM icm_prediction_flatten = icm_prediction.toarray().flatten() icm_prediction_flatten[known_indices] = 0 icm_prediction_flatten[nontarget_indices] = 0 ucm_prediction = self.URM[index, :] * self.S_UCM ucm_prediction_flatten = ucm_prediction.toarray().flatten() ucm_prediction_flatten[known_indices] = 0 ucm_prediction_flatten[nontarget_indices] = 0 slimBPR_prediction = URM_tfidf_csr[index, :] * self.Slim slimBPR_prediction_flatten = slimBPR_prediction.toarray().flatten() slimBPR_prediction_flatten[known_indices] = 0 slimBPR_prediction_flatten[nontarget_indices] = 0 # Round Robin prediction top_5_indices = self.round_robin(icm_prediction_flatten, ucm_prediction_flatten, slimBPR_prediction_flatten, self.mode, self.a, self.b, self.c) top_5_tracks = builder.get_top_10_tracks_from_indices( top_5_indices) top_5_tracks_string = ' '.join([str(i) for i in top_5_tracks]) # Create dataset if self.is_test: dataframe_list.append([self.target_playlists[i], top_5_tracks]) else: dataframe_list.append( [self.target_playlists[i], top_5_tracks_string]) dataframe = pd.DataFrame(dataframe_list, columns=['playlist_id', 'track_ids']) return dataframe
def main(): build = Builder() goods_list = build.create_goods(10) eldorado = Seller(goods_list, "eldorado") worker_vasya = Worker("Вася", salary=100, greedy_level=1.4) worker_petya = Worker("Петя", salary=80, greedy_level=1.2) eldorado.add_worker(worker_petya) eldorado.add_worker(worker_vasya) worker_vasya.sell_one_good(eldorado.goods_list) worker_petya.sell_one_good(eldorado.goods_list) print(eldorado.get_workers_info()) print(build.get_string_repr(eldorado.goods_list)) worker_petya.sale_prices(eldorado.goods_list, 0.8) worker_vasya.upscale_prices(eldorado.goods_list) print(build.get_string_repr(eldorado.goods_list)) input()
def read_data_split_and_search(): """ This function provides a simple example on how to tune parameters of a given algorithm The BayesianSearch object will save: - A .txt file with all the cases explored and the recommendation quality - A _best_model file which contains the trained model and can be loaded with recommender.load_model() - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter) - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set """ dataReader = Builder() URM_train = dataReader.get_URM_train() URM_validation = dataReader.get_URM_validation() URM_test = dataReader.get_URM_test() output_root_path = "result_experiments/" # If directory does not exist, create if not os.path.exists(output_root_path): os.makedirs(output_root_path) collaborative_algorithm_list = [HybridRec] from ParameterTuning.AbstractClassSearch import EvaluatorWrapper from Base.Evaluation.Evaluator import SequentialEvaluator evaluator_validation_earlystopping = SequentialEvaluator(URM_validation, cutoff_list=[5]) evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[5, 10]) evaluator_validation = EvaluatorWrapper(evaluator_validation_earlystopping) evaluator_test = EvaluatorWrapper(evaluator_test) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, metric_to_optimize="MAP", evaluator_validation_earlystopping=evaluator_validation_earlystopping, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_root_path=output_root_path) # pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc()
def __init__(self, **kwargs): Builder.__init__(self, **kwargs) self.galaxy = self.build_data_bundle() self.ssp_data = self.galaxy.file_SSP() self.sfh_data = self.galaxy.file_SFH() self.elines_data = self.galaxy.file_FLUX_ELINES()
def mf_als_rec(is_test): print('*** Test MF-ALS Recommender ***') conf = SparkConf().setAppName("MF-ALS Rec").setMaster("local") sc = SparkContext(conf=conf) b = Builder() ev = Evaluator(is_test=is_test) ev.split() UCM = b.get_UCM(ev.get_URM_train()) target_playlists = ev.get_target_playlists() urm_train_indices = ev.get_URM_train().nonzero() ratings_list = [] print('Creating RDD of tuples') for index in tqdm(range(0, urm_train_indices[0].size)): ratings_list.append( Rating(urm_train_indices[0][index], urm_train_indices[1][index], 1)) ratings = sc.parallelize(ratings_list) model = ALS.trainImplicit(ratings, rank=10, iterations=5, alpha=0.01) dataframe_list = [] print('Predicting...', flush=True) all_predictions = model.recommendProductsForUsers(10).filter(lambda r: r[0] in target_playlists)\ .collect() for u in tqdm(all_predictions): prediction = [] for i in u[1]: prediction.append(i.product) dataframe_list.append([u[0], prediction]) def get_id(e): return e[0] dataframe_list.sort(key=get_id) train_df = pd.DataFrame(dataframe_list, columns=['playlist_id', 'track_ids']) if is_test: map5 = ev.map5(train_df) print('Hybrid MAP@10:', map5) return map5 else: print('Prediction saved!') train_df.to_csv(os.path.dirname(os.path.realpath(__file__))[:-19] + "/all/sub.csv", sep=',', index=False) return 0
def __init__(self, is_test=True): self.b = Builder() self.URM_train = None self.test_df = None self.target_playlists = None self.target_tracks = None self.num_playlists_to_test = 10000 self.is_test = is_test self.num_playlists_to_evaluate = 10000
def builder_for_base_image(self, template, parameters=None): builder = Builder() builder.build_image_from_template(template, parameters=parameters) self.builders_lock.acquire() try: self.builders[builder.base_image.identifier] = builder finally: self.builders_lock.release() return builder
def builder_for_provider_image(self, provider, credentials, target, image_id=None, template=None, parameters=None): builder = Builder() builder.create_image_on_provider(provider, credentials, target, image_id, template, parameters) self.builders_lock.acquire() try: self.builders[builder.provider_image.identifier] = builder finally: self.builders_lock.release() return builder
def test_uptodate(self): builder = Builder() dst = self.getPath("y") builder.applyRule(dst, self.srcPaths, lambda: catFiles(dst, self.srcPaths)) self.assertEquals(1, countLines(dst)) self.assertEquals(1, builder.numRules) self.assertEquals(0, builder.numMade)
def builder_for_target_image(self, target, image_id=None, template=None, parameters=None): builder = Builder() builder.customize_image_for_target(target, image_id, template, parameters) self.builders_lock.acquire() try: self.builders[builder.target_image.identifier] = builder finally: self.builders_lock.release() return builder
def test_uptodate(self): builder = Builder() dst = self.getPath('y') builder.applyRule(dst, self.srcPaths, lambda: catFiles(dst, self.srcPaths)) self.assertEquals(1, countLines(dst)) self.assertEquals(1, builder.numRules) self.assertEquals(0, builder.numMade)
def __init__(self, train_data, predict_on, n_trees=20, max_depth=5, predictors=None): from Builder import Builder self.train_data = train_data self.n_trees = n_trees self.max_depth = max_depth self.predict_on = predict_on self.trees = [] self.avg = sum(train_data[predict_on])/float(len(train_data)) b = Builder() for n in xrange(n_trees): print "starting tree " + str(n+1) self.trees.append(b.build(train_data, predict_on, max_depth, predictors)) print "finished tree " + str(n+1)
def build(self): print("build") path = self.entry_dirPath.get() if self.checkInput(path): self.builder = Builder(path, self.bins) try: Builder.build(self.builder) if self.builder.trainingSet is None: tkMessageBox.showinfo("Bad input", "Bins isn't valid, Please try to another value of bins") else: tkMessageBox.showinfo("Building Done", "Building classifier using train-set is done!") except: tkMessageBox.showinfo("Failed", "Something went wrong, please try again")
def test_build(self): builder = Builder() dst = self.getPath("123") builder.applyRule(dst, self.srcPaths, lambda: catFiles(dst, self.srcPaths)) self.assert_(os.path.exists(dst)) self.assertEquals(3, countLines(dst)) dst = self.getPath("x") builder.applyRule(dst, self.srcPaths, lambda: catFiles(dst, self.srcPaths)) self.assertEquals(3, countLines(dst)) self.assertEquals(2, builder.numRules) self.assertEquals(2, builder.numMade)
def build_test_x(self): print("Build test_x...") b = Builder() playlists_indices = b.get_playlists_indices(self.test_playlists) tracks_indices = b.get_tracks_indices(self.test_tracks) ICM_lil = self.ICM.tolil() URM_lil = self.URM.tolil() rows_list = [] for p_i in tqdm(playlists_indices): p_i_tracks = URM_lil[p_i].rows[0] self.build_rows(p_i, p_i_tracks, ICM_lil, rows_list, True) # Stack all the rows together self.test_x = sparse.vstack(rows_list)
def __init__(self, top_level_dir, repos, jobs=1, sudo_creds=None): """ :param top_level_dir: the directory that holds all the cloned repositories according to manifest example: <top_level_dir>/on-http/... /on-tftp/... :param repos: a list of repositories to be build :param jobs: Number of parallel jobs(build debian packages) to run. :param sudo_creds: the environment variable name of sudo credentials. for example: SUDO_CRED=username:password :return: None """ self.top_level_dir = top_level_dir self._repos = repos self._jobs = jobs self._sudo_creds = sudo_creds self._builder = Builder(self._jobs)
def main(): parser = argparse.ArgumentParser(description='Fator 7 StarWeb Builder.') parser.add_argument("absolute_path") parser.add_argument("--env", default="gisdesenv") parser.add_argument("--run", action="store_true") args = parser.parse_args() print("Local path.: {f}".format(f=args.absolute_path)) print("Work dir...: {d}".format(d=os.environ['STARWEB_WORK_DIR'])) print("Remote.path: {p}".format(p=os.environ['STARWEB_SHARE_PATH'])) if args.run: print("Env........: {e}".format(e=args.env)) print("Run........: {r}\n".format(r=args.run)) else: print("Env........: {e}\n".format(e=args.env)) builder = Builder(os.environ['STARWEB_SHARE_PATH'], args.absolute_path, os.environ['STARWEB_WORK_DIR'], args.env, args.run, os.environ['STARWEB_USER'], os.environ['STARWEB_PASSWORD']) if args.run: builder.run_script() else: builder.copy() builder.build()
def main(): build = Builder() go = build.create_goods(10) eldorado = Seller(go, "Eldorado") eldorado.add_worker(Worker("Vasya", salary=100, greedy_level=1.3)) eldorado.add_worker(Worker("Petya", salary=200, greedy_level=1.8)) eldorado.add_worker(Worker("Ivan", salary=150, greedy_level=1.1)) eldorado.add_worker(Worker("Vasya", salary=560, greedy_level=1.4)) print(eldorado.get_random_worker().get_sold_goods_info()) print(build.get_string_repr(eldorado.goods_list)) print("Amount of Worker class:", Worker.amount_of_workers) print("Amount of Good class:", Good.amount) print("Amount of Builder class:", Builder.amount) print("Amount of Seller class:", Builder.amount) input("PressEnter")
def main2(): """ Read in a sentence and calculates its probability""" depth = 5 trainer = loadTrainer(False, depth) b = Builder() sentence = "Did it ever rain in Steinfurt" probability = 1.0 for i in range(0, len(sentence) - depth): word = '' probCon = Probability(-1, '', 1.0) for j in range(0, depth): word += sentence[i + j] b.conditionalProbablility(probCon, IOHandler().mapCharacter(sentence[i + j]), trainer) probability = probability * probCon.getChild(word).getProbability() print("DEBUG: Probablility of \"", sentence, "\" is ", str(probability)) return True
def getResult(self): items = Builder().getItems() xml = "<list>" xmlContent = "" for item in items: xmlContent = xmlContent + self.getPartElement(item.getItem()) xml = xml + xmlContent + "</list>" return xml
def makeGenerator(images): classifier = AvgRGBClassifier() deltaCalculator = (AvgDeltaCalculator().addCalculator( WeightedRGBDeltaCalculator(), 255.0).addCalculator(WeightedHSVDeltaCalculator())) builder = Builder((56, 56)) generator = Generator(classifier.classify(images), deltaCalculator, builder) return generator
def __init__(self, func, testtype, learn_type='hPES', nperd=30, learn_rate=5e-5, supervision_ratio=0.5, oja=False, seed=None): if func == 'channel': self.func = LearnBuilder.channel self.in_d = 3 self.out_d = 3 self.runlength = 30.0 elif func == 'conv': self.func = LearnBuilder.convolution self.in_d = 6 self.out_d = 3 self.runlength = 80.0 else: raise Exception('Function %s not supported' % func) if testtype == 'full': if func == 'conv': self.train = 4.0 self.test = 5.0 elif func == 'channel': self.train = 0.5 self.test = 2.0 elif testtype == 'one': self.train = self.runlength self.test = 20.0 else: raise Exception('Test type %s not supported' % testtype) self.testtype = testtype self.learn_type = learn_type self.nperd = nperd self.supervision_ratio = supervision_ratio self.oja = oja self.learn_rate = learn_rate # If no seed passed in, we'll generate one if seed is None: seed = random.randint(0, 0x7fffffff) self.seed = seed Builder.__init__(self)
def fit(self, URM, target_playlists, target_tracks, num_playlist_to_recommend, ICM, k, knn, is_test): self.URM = URM self.target_playlists = target_playlists self.target_tracks = target_tracks self.num_playlist_to_recommend = num_playlist_to_recommend self.ICM = ICM self.is_test = is_test self.S_ICM_SVD = Builder().get_S_ICM_SVD(self.ICM, k, knn)
def collaborative_filtering(is_test): print('*** Test Collaborative Filtering Recommender ***') b = Builder() ev = Evaluator() ev.split() rec = CollaborativeFilteringRec.CollaborativeFilteringRec() S_UCM = b.get_S_UCM_KNN(b.get_UCM(b.get_URM()), 500) rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, S_UCM, True) train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('CollaborativeFiltering MAP@5:', map5) else: print('Prediction saved!') train_df.to_csv('CollaborativeFiltering.csv', sep=',', index=False)
def item_based(is_test): print('*** Item Based Recommender ***') b = Builder() ev = Evaluator() ev.split() rec = ItemBasedRec.ItemBasedRec() S_ICM = b.build_S_ICM_knn(b.build_ICM(), 250) rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, S_ICM, is_test) train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('ItemBased MAP@5:', map5) else: print('Prediction saved!') train_df.to_csv('ItemBased.csv', sep=',', index=False)
class WeatherSkill: api = OWMApi() profile = None @staticmethod @IntentBuilder(Builder("weather")) def handle_current_weather(string: str): """ Погода в Париже :return: """ forecast = WeatherSkill.get_forecast(string) parsed = Forecast().parse(forecast) return "Сейчас за окном %s, температура %.1f градусов" % ( parsed.detailed_status, parsed.temp) @staticmethod @IntentBuilder(Builder("forecast").require("weather")) def handle_forecast(string: str): forecast = WeatherSkill.get_forecast(string) parsed = Forecast().parse(forecast) return "По прогнозу будет %s, температура %.1f" % ( parsed.detailed_status, parsed.temp) @staticmethod @IntentBuilder(Builder("weather").require("will")) def handle_weather_will(string: str): forecast = WeatherSkill.get_forecast(string) parsed = Forecast().parse(forecast) return "По прогнозу будет %s, температура %.1f" % ( parsed.detailed_status, parsed.temp) @staticmethod def get_forecast(string: str) -> dict: city = extract_city(string) if city is None: city = "Moscow,ru" date = extract_datetime(string) forecast = WeatherSkill.api.weather_forecast(date, city) return forecast
def recommend(self): b = Builder() nontarget_indices = b.get_nontarget_indices(self.target_tracks) dataframe_list = [] print('Predicting...', flush=True) for i in tqdm(range(0, self.num_playlist_to_recommend)): # Iterate over indices of target playlists index = b.get_target_playlist_index(self.target_playlists[i]) # Compute relevant indices for the prediction known_indices = np.nonzero(self.URM[index].toarray().flatten()) URM_row = self.URM[index, :] * self.Slim # Make prediction URM_row_flatten = URM_row.toarray().flatten() top_5_indices = b.get_top_10_indices(URM_row_flatten, nontarget_indices, known_indices, owner_indices) top_5_tracks = b.get_top_10_tracks_from_indices(top_5_indices) top_5_tracks_string = ' '.join([str(i) for i in top_5_tracks]) # Create dataset if self.is_test: dataframe_list.append([self.target_playlists[i], top_5_tracks]) else: dataframe_list.append([self.target_playlists[i], top_5_tracks_string]) dataframe = pd.DataFrame(dataframe_list, columns=['playlist_id', 'track_ids']) return dataframe
def recommend(self): # Compute the indices of the non-target playlists b = Builder() nontarget_indices = b.get_nontarget_indices(self.target_tracks) # Initialize the dataframe dataframe_list = [] # Apply tfidf on the traspose of URM print('Predicting...', flush=True) for i in tqdm(range(0, self.num_playlist_to_recommend)): # Iterate over indices of target playlists index = b.get_target_playlist_index(self.target_playlists[i]) # Compute the indices of the known tracks known_indices = np.nonzero(self.URM[index].toarray().flatten()) # Make top-10 prediction URM_row_flatten = self.MfRec.predict(index).toarray().flatten() top_10_indices = b.get_top_10_indices(URM_row_flatten, nontarget_indices, known_indices, []) top_10_tracks = b.get_top_10_tracks_from_indices(top_10_indices) top_10_tracks_string = ' '.join([str(i) for i in top_10_tracks]) # Create dataset if self.is_test: dataframe_list.append([self.target_playlists[i], top_10_tracks]) else: dataframe_list.append([self.target_playlists[i], top_10_tracks_string]) dataframe = pd.DataFrame(dataframe_list, columns=['playlist_id', 'track_ids']) return dataframe
def train(self): # Normalize the weights of the event sample sumWeights = 0 for iSample in range(len(self.eventSample)): sumWeights += self.eventSample[iSample][1] for iSample in range(len(self.eventSample)): self.eventSample[iSample][1] /= sumWeights builder = Builder(); # Build the forest for iTree in range(self.numTrees): print("Tree:", iTree) # Boost the event weights if iTree!=0: for iSample in range(len(self.eventSample)): out = self.trees[iTree-1].evaluate(self.eventSample[iSample][0]) self.eventSample[iSample][1] *= np.exp(-out*self.treeWeights[iTree-1]*self.eventSample[iSample][2]) # Train tree tree = builder.build(self.eventSample, self.treeDepth, self.numCuts, self.minNodeSize) self.trees.append(tree) # Set tree weight if iTree==0: self.treeWeights.append(1) else: err = 0 for iSample in range(len(self.eventSample)): out = self.trees[iTree].evaluate(self.eventSample[iSample][0]) if out!=self.eventSample[iSample][2]: err += self.eventSample[iSample][1] self.treeWeights.append(0.5*np.log((1.0-err)/err)) # Test trained trees numTrue = 0 numFalse = 0 for iSample in range(len(self.eventSample)): out = self.evaluate(self.eventSample[iSample][0]) if out==self.eventSample[iSample][2]: numTrue += 1 else: numFalse += 1 print("Eff:", numTrue/(numTrue+numFalse))
def main(): build = Builder("Toshiba") go = build.create_goods(10) eldorado = Seller("ELDORADO", go) eldorado.add_worker(Worker("Egor", salary=100, greedy_level=1.3)) eldorado.add_worker(Worker("Petya", salary=200, greedy_level=1.8)) eldorado.add_worker(Worker("Ivan", salary=150, greedy_level=1.1)) eldorado.add_worker(Worker("Vasya", salary=560, greedy_level=1.4)) print(eldorado.find_good_by_id(2)) eldorado.get_random_worker().sell_one_good(eldorado.GoodsList) print("BestWorker", eldorado.find_best_worker()) eldorado.get_random_worker().upscale_prices(eldorado.GoodsList) print(eldorado.get_random_worker().get_sold_goods_info()) print(build.get_string_repr(eldorado.GoodsList)) print("Amount of Worker class:", Worker.get_amount()) print("Amount of Good class:", Good.get_amount()) print("Amount of Builder class:", Builder.get_amount()) print("Amount of Seller class:", Seller.get_amount()) input("PressEnter")
def hybrid_rec(is_test): print('*** Test Hybrid Recommender ***') b = Builder() ev = Evaluator(is_test=is_test) ev.split() rec = HybridRec.HybridRec() S_UCM = b.get_S_UCM_KNN(b.get_UCM(ev.get_URM_train()), 600) S_ICM = b.build_S_ICM_knn(b.build_ICM(), 250) Slim = SlimBPR.SlimBPR(ev.get_URM_train(), epochs=1, learning_rate=0.01, positive_item_regularization=1, negative_item_regularization=1).get_S_SLIM_BPR(500) rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, S_ICM, S_UCM, Slim, is_test, alfa=0.3, avg=0.3) """ 0.30, 0.30 alfa*((1-avg)*collab + avg*content) + (1-alfa)*slimBPR only collab con knn=500 0.09080017548893707 knn=600 0.09085745115462485 only content knn=250 0.05537121844924659 knn=300 0.055101704695727706 only slim con lr=0.01 epoch=1 0.09087007071213243 lr=0.001 epoch=8 0.09346656108877179 content+collab con avg=0.20 0. avg=0.30 0.09762916809334841 all together con alfa=0.40 0.10715025718387602 alfa=0.30 0.1082252839472891 """ train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('Hybrid MAP@10:', map5) return map5 else: print('Prediction saved!') train_df.to_csv(os.path.dirname(os.path.realpath(__file__))[:-19] + "/all/sub.csv", sep=',', index=False) return 0
def recommend_avg_similarity(self, avg, beta): b = Builder() #2nb = NewBuilder() nontarget_indices = b.get_nontarget_indices(self.target_tracks) URM_T = self.URM.T URM_tfidf_T = feature_extraction.text.TfidfTransformer().fit_transform( URM_T) URM_tfidf = URM_tfidf_T.T URM_tfidf_csr = URM_tfidf.tocsr() dataframe_list = [] # Weighted average of S_ICM and S_UCM S_avg = (avg * self.S_ICM) + ((1 - avg) * self.S_UCM) print('Predicting avg_similarity...', flush=True) for i in tqdm(range(0, self.num_playlist_to_recommend)): # Iterate over indices of target playlists index = b.get_target_playlist_index(self.target_playlists[i]) # Compute relevant indices for the prediction known_indices = np.nonzero(self.URM[index].toarray().flatten()) #owner_tracks = nb.get_tracks_from_playlist_owner(self.target_playlists[i]) #owner_indices = nb.get_tracks_indices(owner_tracks) owner_indices = [] # Calculate recommenders contributions avg_prediction = URM_tfidf_csr[index, :] * S_avg #avg_prediction = normalize(avg_prediction, axis=1, norm='l2') #slimBPR_prediction = URM_tfidf_csr[index, :] * self.Slim #slimBPR_prediction = normalize(slimBPR_prediction, axis=1, norm='l2') # Weighted average of recommendations URM_row = (beta * avg_prediction ) #+ ((1-beta) * slimBPR_prediction) # Make prediction URM_row_flatten = URM_row.toarray().flatten() top_5_indices = b.get_top_5_indices(URM_row_flatten, nontarget_indices, known_indices, owner_indices) top_5_tracks = b.get_top_5_tracks_from_indices(top_5_indices) top_5_tracks_string = ' '.join([str(i) for i in top_5_tracks]) # Create dataset if self.is_test: dataframe_list.append([self.target_playlists[i], top_5_tracks]) else: dataframe_list.append( [self.target_playlists[i], top_5_tracks_string]) dataframe = pd.DataFrame(dataframe_list, columns=['playlist_id', 'track_ids']) return dataframe
def collaborative_filtering(is_test): print('*** Test Collaborative Filtering Recommender ***') b = Builder() ev = Evaluator(is_test=is_test) ev.split() rec = CollaborativeFilteringRec.CollaborativeFilteringRec() S_UCM = b.get_S_UCM_KNN(b.get_UCM(ev.get_URM_train()), 500) rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, S_UCM, is_test) train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('CollaborativeFiltering MAP@5:', map5) else: print('Prediction saved!') train_df.to_csv(os.path.dirname(os.path.realpath(__file__))[:-19] + "/all/subCollab.csv", sep=',', index=False)
def getResult(self): items = Builder().getItems() json = "{" itemsSize = len(items) currentItem = 0 jsonContent = "" for item in items: currentItem = currentItem +1 jsonContent = jsonContent + self.getPartElement(item.getItem()) if(currentItem < itemsSize): jsonContent = jsonContent+"," json = json +jsonContent+"}" return json
def main(): build = Builder("Toshiba") amount_of_goods = 15 splitter = 60 goods = build.create_goods(amount_of_goods) eldorado = Seller("Eldorado") eldorado.add_goods(goods) print(build.get_string_repr(eldorado.GoodsList), "\n", "=" * splitter) cons_petya = WorkerConsultant("Petya", salary=splitter, greedy_level=1.1) cons_vasya = WorkerConsultant("Vasya", salary=200, greedy_level=1.3) cons_andrea = WorkerConsultant("Andrea", salary=150, greedy_level=1.5) man_victor = WorkerManager("Victor", salary=1000) eldorado.add_worker(cons_petya) eldorado.add_worker(cons_vasya) eldorado.add_worker(cons_andrea) eldorado.add_worker(man_victor) for i in range(len(eldorado.GoodsList) - 10): eldorado.get_random_consultant().sell_one_good(eldorado.GoodsList) print("=" * splitter) eldorado.get_random_consultant().upscale_prices(eldorado.GoodsList) print("=" * splitter) for i in range(len(eldorado.GoodsList) - 5): eldorado.get_random_consultant().sell_one_good(eldorado.GoodsList) print(eldorado.get_workers_info()) print("=" * splitter) man_victor.fire_worker(eldorado.find_worst_consultant(), eldorado.WorkerList) print("=" * splitter) print("\n", eldorado.get_workers_info(), sep="") print(eldorado.get_random_consultant().get_sold_goods_info()) print("Highest salary", eldorado.get_highest_salary_worker().get_info()) print("=" * splitter) search_id = randint(0, amount_of_goods - 1) print("Search Results of id {0}: {1}".format( search_id, eldorado.find_good_by_id(search_id))) print("=" * splitter) print(build.get_string_repr(eldorado.GoodsList)) print("=" * splitter) print("Cost of all goods in", eldorado, eldorado.calculate_prices()) print("=" * splitter) print("Amount of WorkerConsultant class:", WorkerConsultant.get_amount()) print("Amount of WorkerManager class:", WorkerManager.get_amount()) print("Amount of Good class:", Good.get_amount()) print("Amount of Builder class:", Builder.get_amount()) print("Amount of Seller class:", Seller.get_amount()) input("PressEnter")
def SVD(is_test): print('*** Test SVD Recommender ***') b = Builder() ev = Evaluator(is_test=is_test) ev.split() rec = SVDRec.SVDRec() rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, b.build_ICM(), k=10, knn=250, is_test=is_test) train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('SlimBPR MAP@5:', map5) else: print('Prediction saved!') train_df.to_csv('SlimBPR.csv', sep=',', index=False)
def main(): trainer = loadTrainer(False, 5) probSeq = Probability(-1,'', 1.0) probCon = Probability(-1,'', 1.0) io = IOHandler() b = Builder() inputStr = io.getInput() i = 0 while(inputStr != False): b.charSequenceProbablility(probSeq, inputStr, trainer) b.conditionalProbablility(probCon, inputStr, trainer) #Check if input word is stil within range. If not we can not calculate #probabilites i += 1 if(i >= trainer.depth): print("DEBUG: Input word is bigger then depth of training corpus. Exiting now!") break inputStr = io.getInput() return True
def run(self, name, logdir): import nef if self.net is None: self.make() fn = Builder.write_param_file(name, self.params, logdir) lognode = nef.Log(self.net, "log", dir=logdir, filename='%s.csv' % fn, interval=0.01) lognode.add('switch', origin='origin', tau=0.0) lognode.add('actual error') if self.testtype == 'full': length = LearnBuilder.get_full_length( self.runlength, self.train, self.test) elif self.testtype == 'one': length = self.train + self.test self.net.network.run(0, length) self.net.network.removeStepListener(lognode)
def builder_for_base_image(self, template, parameters=None): builder = Builder() builder.build_image_from_template(template) self.builders[builder.base_image.identifier] = builder return builder
def builder_for_target_image(self, target, image_id=None, template=None, parameters=None): builder = Builder() builder.customize_image_for_target(target, image_id, template, parameters) self.builders[builder.target_image.identifier] = builder return builder
def __init__(self, name = 'GNU makefile for g++/gcc', buildTool = 'make', buildToolOptions = '-f'): Builder.__init__(self, name, buildTool, buildToolOptions)
def _splittag(self, path): tag, attribs = Builder._splittag(self, path) theattribs = [] for key, value in attribs: theattribs.append((key, value)) return tag, theattribs
def __init__(self, d = {}): Builder.__init__(self, d)
def builder_for_provider_image(self, provider, credentials, target, image_id=None, template=None, parameters=None): builder = Builder() builder.create_image_on_provider(provider, credentials, target, image_id, template, parameters) self.builders[builder.provider_image.identifier] = builder return builder
class DebianBuilder(object): """ This is a class that builds the debian packages. It assumes that the repository is cloned successfully and is accessible for the tool. """ def __init__(self, top_level_dir, repos, jobs=1, sudo_creds=None): """ :param top_level_dir: the directory that holds all the cloned repositories according to manifest example: <top_level_dir>/on-http/... /on-tftp/... :param repos: a list of repositories to be build :param jobs: Number of parallel jobs(build debian packages) to run. :param sudo_creds: the environment variable name of sudo credentials. for example: SUDO_CRED=username:password :return: None """ self.top_level_dir = top_level_dir self._repos = repos self._jobs = jobs self._sudo_creds = sudo_creds self._builder = Builder(self._jobs) @property def top_level_dir(self): return self._top_level_dir @top_level_dir.setter def top_level_dir(self, top_level_dir): """ Setter for the repository directory :param top_level_dir: the directory that holds all the cloned repositories according to manifest example: <top_level_dir>/on-http/... /on-tftp/... :return: None """ if os.path.isdir(top_level_dir): self._top_level_dir = os.path.abspath(top_level_dir) else: raise ValueError("The path provided '{dir}' is not a directory." .format(dir=top_level_dir)) def generate_tasks(self): """ Generate a list of tasks to be perform. An example of task: { 'name': repo, 'data': { 'commands': [command1, ...], #command1 is an instance of BuildCommand 'env_file': on-http.version } } """ tasks = [] for repo in self._repos: task = { 'name': repo, 'data': { 'commands': [], 'env_file': None } } command_name = './HWIMO-BUILD' path = os.path.abspath(os.path.join(self._top_level_dir, repo)) if not os.path.exists(path): raise ValueError("Repository {0} doesn't exist under {1}" .format(repo, self._top_level_dir)) command = BuildCommand(command_name, path) if repo == "on-imagebuilder" and self._sudo_creds: command.use_sudo = True command.sudo_creds = self._sudo_creds task['data']['commands'].append(command) version_file = "{0}.version".format(repo) version_path = os.path.abspath(os.path.join(path, version_file)) if os.path.exists(version_path): task['data']['env_file'] = version_path print "[Info] Execute command {0} for repo {1}.".format( command_name, repo ) tasks.append(task) return tasks def blind_build_all(self): """ Iterate through the first layer subdirectory of top_level_dir and if found HWIMO-BUILD, then execute the script. """ try: tasks = self.generate_tasks() for task in tasks: self._builder.add_task(task['data'], task['name']) self._builder.finish() except Exception, e: raise RuntimeError("Failed to build all debian packages due to \n{0}".format(e))
import os.path import sys # I make a symlink `trevor` in the nengo directory, pointing to scriptdir sys.path.append('trevor/nengo') from Builder import Builder from LearnBuilder import LearnBuilder scriptdir = os.path.expanduser("~/nengo-latest/trevor/nengo") logdir = os.path.expanduser("~/Programming/cogsci2013/results/") if False: builder = LearnBuilder('channel') builder.view(True) else: name = sys.argv[1] testtype = sys.argv[2] params = Builder.parse_params(sys.argv[3:]) if testtype == 'full': logdir = logdir + "functions-test" elif testtype == 'one': logdir = logdir + "functions-optimize" builder = LearnBuilder(name, testtype, **params) builder.run(name, logdir)
from Builder import Builder copyright = '(C) UtgDev' minified = 'JSONPageBuilder' jsfolder = ['/js/libs/jquery', '/js/libs', '/js/libs/bootstrap', '/js/libs/moment','/js/libs/moment/extensions','/js/helpers','/js/fixtures','/js/pagebuilder','/js/pagebuilder/extensions', '/js'] removeTemp = False debug = True builder = Builder(copyright, minified, jsfolder, removeTemp, debug) builder.build()