parser.add_argument("--json_data", default='../data/EventDump_10Ktracks.json', type=str, help="Json data path") parser.add_argument("--npy_data", default='../data/ET_muons_10K_0000.npy', type=str, help="NPY data") args = parser.parse_args() np.random.seed(1234) data = np.load(args.npy_data) BD = BatchNpyData2(data) config = TestConfig() m = testrnn(config) data, filtered_data, rand_int = BD.sample_batch(m.config.MaxNumSteps, 1000) data, max_data = pre_process(data) filtered_data, _ = pre_process(filtered_data, max_data) test, filtered_test_data, rand_int = BD.sample_batch( m.config.MaxNumSteps, m.config.batch_size) test, _ = pre_process(test, max_data) filtered_test_data, _ = pre_process(filtered_test_data, max_data) cost_lst = [] tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True with tf.Session(config=tf_config) as sess: tf.set_random_seed(1234) summary_writer = tf.train.SummaryWriter('Logs/') sess.run(tf.initialize_all_variables()) for ii in range(args.niter): m.assign_lr(sess, config.learning_rate) #ind,data = m.generate_data(m.config.batch_size,2)
rank_improv_1_MAP = MAP() rank_improv_2_MAP = MAP() for file in files: if file == '.DS_Store': # Ignore mac default attributes' folder continue source_file = open(TeMario_originals + file, 'r', encoding='iso-8859-1') target_file = open(TeMario_summaries + 'Ext-' + file, 'r', encoding='iso-8859-1') source = pre_process(source_file.read()) target = pre_process(target_file.read()) source_file.close() target_file.close() sents = filter_list(sent_tokenize(source)) vectorizer = CustomVectorizer() vectorizer.fit(sents) vecs = vectorizer.transform_tfidf(sents) source_score = vectorizer.transform_tfidf([source])[0] graph = defaultdict(lambda: [])
if __name__ == "__main__": parser = argparse.ArgumentParser(description="Arguments for fitting") parser.add_argument("--niter",default=10,type=int,help="Number of iterations") parser.add_argument("--json_data",default='../data/EventDump_10Ktracks.json',type=str,help="Json data path") parser.add_argument("--npy_data",default='../data/ET_muons_10K_0000.npy',type=str,help="NPY data") args = parser.parse_args() rnd = np.random.RandomState(0) data = np.load(args.npy_data) BD= BatchNpyData(data) n_states = 5 n_dim_obs = 3 MaxNumSteps = 10 batch_size = 200 test, rand_int = BD.sample_batch(MaxNumSteps,batch_size) test, _ = pre_process(test) # create a Kalman Filter by hinting at the size of the state and observation # space. If you already have good guesses for the initial parameters, put them # in here. The Kalman Filter will try to learn the values of all variables. #kf = KalmanFilter(transition_matrices=np.array([[1, 1], [0, 1]]), # transition_covariance=0.01 * np.eye(2), # em_vars=['transition_matrices','transition_covariance', # 'observation_matrices', 'observation_covariance', # 'observation_offsets','transition_offsets']) trans_mat_init = np.array([[1,0,init(),init(),init()], [0,1,init(),init(),init()], [init(),init(),1,0,0], [init(),init(),0,1,0], [init(),init(),0,0,1]]) observation_mat = np.array([[1,0,0,0,0],
break rank = {k : round(rank[k], 6) for k in rank.keys()} # the rounding precision influences convergence testing return rank, i if __name__ == '__main__': print('\nTesting adapted PageRank algorithm for sentence ranking and consequent text summarization.\n' + 'A graph is built linking sentences with similarity bigger than a certain threshold.\n' + 'This method is tested and evaluated on the "catalunha.txt" file, with a 0.1 threshold.\n') file = open('catalunha.txt', encoding='utf-8') source = pre_process(file.read()) sents = filter_list(sent_tokenize(source)) file.close() vectorizer = CustomVectorizer(stopwords=stopwords.words()) vectorizer.fit(sents) # -> fit on sentences or on whole text? vecs = vectorizer.transform_tfidf(sents) graph = {i: [] for i in range(len(vecs))} threshold = 0.1 for i in range(len(vecs)): for j in range(i+1, len(vecs)): if similarity(vecs[i], vecs[j]) > threshold: