Ejemplo n.º 1
0
def summarize_text(file_path, output_folder, test_type, stemming, lower,
                   idf_type, tf_type, d, intersent_threshold,
                   summary_threshold, epsilon, mle_lambda, k, min_sent_len,
                   include_narrative, bias_formula, intersent_formula,
                   info_order_type, num_permutations, remove_header,
                   remove_parens, remove_quotes, remove_appos, remove_advcl,
                   remove_relcl, remove_acl):
    """
    Creates extractive summaries (<= 100 words) of multi-document news sets from TAC 2009/2010
    Prints one summary file per topic and nests inside outputs/<output_folder>/
    Runs ROUGE and prints results file inside results/<output_folder>_rouge_scores.out

    Args:
        file_path:str file path on patas that leads to directory that holds training or testing data
        output_folder: name of folder to write summaries to
        test_type: either 'dev' for devtest data, or 'eval' for evaltest data
        stemming:bool True enables each sentence to be stored with a stem representation in objects and tokens, False does nothing
        lower:bool True enables each sentence to be stored in lower case, False does nothing.
        idf_type:str String input dictates idf representation in objects. Options are: 'smooth_idf', 'probabilistic_idf' , 'standard_idf' , and 'unary_idf'
        tf_type:str String input dictates tf representation in objects. Options are: 'term_frequency', 'log_normalization'
        d: damping factor, amount to prioritize topic bias in Markov Matrix
        intersent_threshold: minimum amount of similarity required to include in Similarity Matrix
        summary_threshold: maximum amount of similarity between sentences in summary
        epsilon: minimum amount of difference between probabilities between rounds of power method
        mle_lambda: amount to prioritize topic MLE over sentence MLE 
        k: maximum number of intersentential similarity nodes to connect when doing normalized generation probability
        min_sent_len: minimum number of words in a sentence to be used in the summary
        include_narrative: True if the narrative (in addition to title) should be in the bias
        bias_formula: which formula to use for sentence-topic similarity weighting - cos (cosine similarity), rel (relevance), or gen (generation probability)
        intersent_formula: which formula to use for inter-sentential similarity weighting - cos (cosine similarity) or norm (normalized generation probability)
        info_order_type: entity or chron
        num_permutations: int for how many SVM permutations
        remove_header:bool True if the header should be removed in sentence compression
        remove_parens:bool True if parenthetical information should be removed in sentence compression
        remove_quotes:bool True if unpaired quotes should be removed in sentence compression
        remove_appos:bool True if appositional modifier should be removed in sentence compression
        remove_advcl:bool True if adverbial clause modifier should be removed in sentence compression
        remove_relcl:bool True if relative clause modifier should be removed in sentence compression
        remove_acl: True if a finite or non-finite clausal modifier should be removed in in sentence compression

    Returns:
        topic_list: the modified topic_list from the input, with a list of selected sentences
        in the topic.summary fields of each topic.

    """

    # Read in input data
    # and handle content realization as a pre-processing step
    # and return a list of Topic objects (with Documents/Sentences)
    topics = get_data(file_path, stemming, lower, idf_type, tf_type,
                      remove_header, remove_parens, remove_quotes,
                      remove_appos, remove_advcl, remove_relcl, remove_acl)
    #    topics = get_data(file_path, stemming, lower, idf_type, tf_type)

    summarize_topics_list(topics, output_folder, test_type, d,
                          intersent_threshold, summary_threshold, epsilon,
                          mle_lambda, k, min_sent_len, include_narrative,
                          bias_formula, intersent_formula, info_order_type,
                          num_permutations)
Ejemplo n.º 2
0
'''block = QAM_values[:symbol_length]

stream = p.open(format=pa.paFloat32,
				channels=1,
				rate=Fs, 
				output=True, 
				frames_per_buffer=int(transmit_block_length),
				stream_callback=callback
				)
stream = p.open(format=pa.paFloat32,
                channels=1,
                rate=Fs,
                output=True)'''

# This is a list of QAM values of the data
data_bits = data.get_data(filename)
frame_length_bits = symbol_length * 2 * QAM
transmit_frames = int(np.ceil(len(data_bits) / frame_length_bits))
frame_length_samples = int(Fs / dF) + Lp

QAM_values = data.modulate(data_bits, QAM, frame_length_bits)

print(QAM_values)

transmit = np.zeros(transmit_frames * frame_length_samples, dtype=np.float32)

if Modulation_type_OFDM:
    print("Starting OFDM")
    for i in range(transmit_frames):
        '''stream.write(volume*np.tile(encode.OFDM(block, 350, Fc, Fs, dF),4))'''
        transmit[i * frame_length_samples:(i + 1) *
Ejemplo n.º 3
0
    }

    # TUNED CONFIG_2009 PARAMETER VALUES
    mle_lambda_tuned = 0.6
    k_tuned = 9

    # TUNED INFO ORDERING PARAMTER VALUES
    num_permutations_tuned_dict = {
        "form_D2": 5,
        "form_2005": 5,
        "form_2009": 20
    }

    ###### Get the Data #####

    topics = get_data(input_path, stemming_tuned, lower_tuned, idf_type_tuned,
                      tf_type_tuned)

    # Run tests for each of the three formula configurations

    config_D2 = ["form_D2", "cos", "cos"]  # what we ran in D2
    config_2005 = ["form_2005", "rel",
                   "cos"]  # the formulas from the Otterbacher 2005 paper
    config_2009 = ["form_2009", "gen",
                   "norm"]  # the formulas from the Otterbacher 2009 paper
    config_list = [config_D2, config_2005, config_2009]

    for config in config_list:
        form = config[0]
        bias_formula = config[1]
        intersent_formula = config[2]
Ejemplo n.º 4
0
import tensorflow as tf
import numpy as np
from network import gen_graph
from utils import cal_acc
import os 
from config import get_config
from data_input import get_data 

os.environ['CUDA_VISIBLE_DEVICES'] = '0'
para, _ = get_config()


print('Creating input pipeline...') #数据以#号结束
train_feature, train_label = get_data(para.file_train, para.batch_size)
valid_feature, valid_label = get_data(para.file_valid, para.batch_size)
print('Success.')    

train_data_num = para.train_data_num
valid_data_num = para.valid_data_num

total_steps = int(para.total_epochs * train_data_num / para.batch_size)
epoch_learning_rate = para.ini_learning_rate

# creat graph---------------------------------------------------------------------------------------------------------------------------------------
inputs_placeholder = tf.placeholder(tf.float32, shape=[None, para.data_shape])
label_placeholder = tf.placeholder(tf.float32, shape=[None, 2])
global_step = tf.Variable(0, trainable=False, name='global_step')

loss, network_output, loss_visible = gen_graph(inputs_placeholder, label_placeholder, 2)
optimizer = tf.train.AdamOptimizer(epoch_learning_rate)
train_op = optimizer.minimize(loss, global_step)
Ejemplo n.º 5
0
start = time.time()
# 是否加BN层
norm, epsilon = False, 0.001

# TRIGRAM_D = 21128
TRIGRAM_D = 100
# negative sample
NEG = 4
# query batch size
query_BS = 100
# batch size
BS = query_BS * NEG

# 读取数据
conf = Config()
data_train = data_input.get_data(conf.file_train)
data_vali = data_input.get_data(conf.file_vali)
# print(len(data_train['query']), query_BS, len(data_train['query']) / query_BS)
train_epoch_steps = int(len(data_train['query']) / query_BS) - 1
vali_epoch_steps = int(len(data_vali['query']) / query_BS) - 1


def variable_summaries(var, name):
    """Attach a lot of summaries to a Tensor."""
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean/' + name, mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
        tf.summary.scalar('sttdev/' + name, stddev)
        tf.summary.scalar('max/' + name, tf.reduce_max(var))
Ejemplo n.º 6
0
import ldpc_functions

# Fs = 44000
# dF = 16
# QAM = 1
# symbol_length = 1024
# Lp = 350
# Fc = 10050
#
# volume = 1.0

# This is a list of QAM values of the data

frame_length_bits = symbol_length * 2 * QAM
#data_bits = np.random.randint(2,size=frame_length_bits*100)	#generate random sequence of length = 10 frame
data_bits = data.get_data(filename)  #[:1000*frame_length_bits]
#data_bits = np.ones(frame_length_bits*500, dtype=int)

data_bits = data_bits[:int(50 * frame_length_bits * (3 / 2))]
with open("start_bits.txt", 'w') as fout:
    for value in data_bits:
        fout.write(str(value) + '\n')
code_bits = ldpc_functions.encode(data_bits,
                                  standard='802.16',
                                  rate='2/3',
                                  ptype='A')
print(code_bits[:10])
transmit_frames = int(np.ceil(len(code_bits) / frame_length_bits))
frame_length_samples = int(Fs / dF) + Lp

QAM_values = data.modulate(code_bits, QAM, frame_length_bits)
Ejemplo n.º 7
0
> python app.py

Dash documentation: https://dash.plot.ly/
"""

import dash
from dash.dependencies import Input, Output, State, ClientsideFunction
import dash_table
import dash_html_components as html
import dash_core_components as dcc
from make_figures import make_map, make_timeplot
from data_input import get_data, get_mapping, tidy_most_recent

# Data
df = get_data()
mapping = get_mapping()
df_tidy = tidy_most_recent(df)
df_tidy_table = df_tidy[['country_region', 'value']]

# Figures
fig1 = make_map(df_tidy, mapping)
fig2 = make_timeplot(df)

# Markdown text
with open("text_block.md", "r") as f:
    intro_md = f.read()

app = dash.Dash(__name__)
server = app.server