def summarize_text(file_path, output_folder, test_type, stemming, lower, idf_type, tf_type, d, intersent_threshold, summary_threshold, epsilon, mle_lambda, k, min_sent_len, include_narrative, bias_formula, intersent_formula, info_order_type, num_permutations, remove_header, remove_parens, remove_quotes, remove_appos, remove_advcl, remove_relcl, remove_acl): """ Creates extractive summaries (<= 100 words) of multi-document news sets from TAC 2009/2010 Prints one summary file per topic and nests inside outputs/<output_folder>/ Runs ROUGE and prints results file inside results/<output_folder>_rouge_scores.out Args: file_path:str file path on patas that leads to directory that holds training or testing data output_folder: name of folder to write summaries to test_type: either 'dev' for devtest data, or 'eval' for evaltest data stemming:bool True enables each sentence to be stored with a stem representation in objects and tokens, False does nothing lower:bool True enables each sentence to be stored in lower case, False does nothing. idf_type:str String input dictates idf representation in objects. Options are: 'smooth_idf', 'probabilistic_idf' , 'standard_idf' , and 'unary_idf' tf_type:str String input dictates tf representation in objects. Options are: 'term_frequency', 'log_normalization' d: damping factor, amount to prioritize topic bias in Markov Matrix intersent_threshold: minimum amount of similarity required to include in Similarity Matrix summary_threshold: maximum amount of similarity between sentences in summary epsilon: minimum amount of difference between probabilities between rounds of power method mle_lambda: amount to prioritize topic MLE over sentence MLE k: maximum number of intersentential similarity nodes to connect when doing normalized generation probability min_sent_len: minimum number of words in a sentence to be used in the summary include_narrative: True if the narrative (in addition to title) should be in the bias bias_formula: which formula to use for sentence-topic similarity weighting - cos (cosine similarity), rel (relevance), or gen (generation probability) intersent_formula: which formula to use for inter-sentential similarity weighting - cos (cosine similarity) or norm (normalized generation probability) info_order_type: entity or chron num_permutations: int for how many SVM permutations remove_header:bool True if the header should be removed in sentence compression remove_parens:bool True if parenthetical information should be removed in sentence compression remove_quotes:bool True if unpaired quotes should be removed in sentence compression remove_appos:bool True if appositional modifier should be removed in sentence compression remove_advcl:bool True if adverbial clause modifier should be removed in sentence compression remove_relcl:bool True if relative clause modifier should be removed in sentence compression remove_acl: True if a finite or non-finite clausal modifier should be removed in in sentence compression Returns: topic_list: the modified topic_list from the input, with a list of selected sentences in the topic.summary fields of each topic. """ # Read in input data # and handle content realization as a pre-processing step # and return a list of Topic objects (with Documents/Sentences) topics = get_data(file_path, stemming, lower, idf_type, tf_type, remove_header, remove_parens, remove_quotes, remove_appos, remove_advcl, remove_relcl, remove_acl) # topics = get_data(file_path, stemming, lower, idf_type, tf_type) summarize_topics_list(topics, output_folder, test_type, d, intersent_threshold, summary_threshold, epsilon, mle_lambda, k, min_sent_len, include_narrative, bias_formula, intersent_formula, info_order_type, num_permutations)
'''block = QAM_values[:symbol_length] stream = p.open(format=pa.paFloat32, channels=1, rate=Fs, output=True, frames_per_buffer=int(transmit_block_length), stream_callback=callback ) stream = p.open(format=pa.paFloat32, channels=1, rate=Fs, output=True)''' # This is a list of QAM values of the data data_bits = data.get_data(filename) frame_length_bits = symbol_length * 2 * QAM transmit_frames = int(np.ceil(len(data_bits) / frame_length_bits)) frame_length_samples = int(Fs / dF) + Lp QAM_values = data.modulate(data_bits, QAM, frame_length_bits) print(QAM_values) transmit = np.zeros(transmit_frames * frame_length_samples, dtype=np.float32) if Modulation_type_OFDM: print("Starting OFDM") for i in range(transmit_frames): '''stream.write(volume*np.tile(encode.OFDM(block, 350, Fc, Fs, dF),4))''' transmit[i * frame_length_samples:(i + 1) *
} # TUNED CONFIG_2009 PARAMETER VALUES mle_lambda_tuned = 0.6 k_tuned = 9 # TUNED INFO ORDERING PARAMTER VALUES num_permutations_tuned_dict = { "form_D2": 5, "form_2005": 5, "form_2009": 20 } ###### Get the Data ##### topics = get_data(input_path, stemming_tuned, lower_tuned, idf_type_tuned, tf_type_tuned) # Run tests for each of the three formula configurations config_D2 = ["form_D2", "cos", "cos"] # what we ran in D2 config_2005 = ["form_2005", "rel", "cos"] # the formulas from the Otterbacher 2005 paper config_2009 = ["form_2009", "gen", "norm"] # the formulas from the Otterbacher 2009 paper config_list = [config_D2, config_2005, config_2009] for config in config_list: form = config[0] bias_formula = config[1] intersent_formula = config[2]
import tensorflow as tf import numpy as np from network import gen_graph from utils import cal_acc import os from config import get_config from data_input import get_data os.environ['CUDA_VISIBLE_DEVICES'] = '0' para, _ = get_config() print('Creating input pipeline...') #数据以#号结束 train_feature, train_label = get_data(para.file_train, para.batch_size) valid_feature, valid_label = get_data(para.file_valid, para.batch_size) print('Success.') train_data_num = para.train_data_num valid_data_num = para.valid_data_num total_steps = int(para.total_epochs * train_data_num / para.batch_size) epoch_learning_rate = para.ini_learning_rate # creat graph--------------------------------------------------------------------------------------------------------------------------------------- inputs_placeholder = tf.placeholder(tf.float32, shape=[None, para.data_shape]) label_placeholder = tf.placeholder(tf.float32, shape=[None, 2]) global_step = tf.Variable(0, trainable=False, name='global_step') loss, network_output, loss_visible = gen_graph(inputs_placeholder, label_placeholder, 2) optimizer = tf.train.AdamOptimizer(epoch_learning_rate) train_op = optimizer.minimize(loss, global_step)
start = time.time() # 是否加BN层 norm, epsilon = False, 0.001 # TRIGRAM_D = 21128 TRIGRAM_D = 100 # negative sample NEG = 4 # query batch size query_BS = 100 # batch size BS = query_BS * NEG # 读取数据 conf = Config() data_train = data_input.get_data(conf.file_train) data_vali = data_input.get_data(conf.file_vali) # print(len(data_train['query']), query_BS, len(data_train['query']) / query_BS) train_epoch_steps = int(len(data_train['query']) / query_BS) - 1 vali_epoch_steps = int(len(data_vali['query']) / query_BS) - 1 def variable_summaries(var, name): """Attach a lot of summaries to a Tensor.""" with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.summary.scalar('mean/' + name, mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean))) tf.summary.scalar('sttdev/' + name, stddev) tf.summary.scalar('max/' + name, tf.reduce_max(var))
import ldpc_functions # Fs = 44000 # dF = 16 # QAM = 1 # symbol_length = 1024 # Lp = 350 # Fc = 10050 # # volume = 1.0 # This is a list of QAM values of the data frame_length_bits = symbol_length * 2 * QAM #data_bits = np.random.randint(2,size=frame_length_bits*100) #generate random sequence of length = 10 frame data_bits = data.get_data(filename) #[:1000*frame_length_bits] #data_bits = np.ones(frame_length_bits*500, dtype=int) data_bits = data_bits[:int(50 * frame_length_bits * (3 / 2))] with open("start_bits.txt", 'w') as fout: for value in data_bits: fout.write(str(value) + '\n') code_bits = ldpc_functions.encode(data_bits, standard='802.16', rate='2/3', ptype='A') print(code_bits[:10]) transmit_frames = int(np.ceil(len(code_bits) / frame_length_bits)) frame_length_samples = int(Fs / dF) + Lp QAM_values = data.modulate(code_bits, QAM, frame_length_bits)
> python app.py Dash documentation: https://dash.plot.ly/ """ import dash from dash.dependencies import Input, Output, State, ClientsideFunction import dash_table import dash_html_components as html import dash_core_components as dcc from make_figures import make_map, make_timeplot from data_input import get_data, get_mapping, tidy_most_recent # Data df = get_data() mapping = get_mapping() df_tidy = tidy_most_recent(df) df_tidy_table = df_tidy[['country_region', 'value']] # Figures fig1 = make_map(df_tidy, mapping) fig2 = make_timeplot(df) # Markdown text with open("text_block.md", "r") as f: intro_md = f.read() app = dash.Dash(__name__) server = app.server