configuration = getattr(configurations_0, args.proto)()
    # added by Zhaopeng Tu, 2016-05-12
    if args.state:
        configuration.update(eval(open(args.state).read()))
    logger.info("\nModel options:\n{}".format(pprint.pformat(configuration)))

    rng = numpy.random.RandomState(1234)

    enc_dec = EncoderDecoder(rng, **configuration)
    enc_dec.build_sampler()

    # added by Zhaopeng Tu, 2016-05-27
    # options to use other trained models
    if args.model:
        enc_dec.load(path=args.model)
    else:
        enc_dec.load(path=configuration['saveto_best'])

    test_align = Align(enc_dec=enc_dec, configuration)
    test_src = get_stream(args.source, configuration['vocab_src'],
                          **configuration)
    test_trg = get_stream(args.target, configuration['vocab_trg'],
                          **configuration)
    aligner = Aligner(align_model=test_align,
                      test_src=test_src,
                      test_trg=test_trg,
                      **configuration)

    aligner.apply(args.alignment, True)
Esempio n. 2
0
def get_external_metadata(inputfile):

    # define an empty python dictionary
    md = {}

    # Check whether this file exists.
    if not os.path.exists(inputfile):
        return md
            
    # Get the other meta data field parameters						
    md['file_name'] =  os.path.basename(inputfile)
    md['file_size'] =  str(os.path.getsize(inputfile))
    md['crc'] = fileEnstoreChecksum(inputfile)

    # Quit here if file type is not ".root"

    if not inputfile.endswith('.root'):
        return md

    # Root checks.

    file = ROOT.TFile.Open(larbatch_posix.root_stream(inputfile))
    if file and file.IsOpen() and not file.IsZombie():

        # Root file opened successfully.
        # Get number of events.
            
        obj = file.Get('Events')
        if obj and obj.InheritsFrom('TTree'):

            # This has a TTree named Events.

            nev = obj.GetEntriesFast()
            md['events'] = str(nev)

        # Get runs and subruns fro SubRuns tree.

        subrun_tree = file.Get('SubRuns')
        if subrun_tree and subrun_tree.InheritsFrom('TTree'):
            md['subruns'] = []
            nsubruns = subrun_tree.GetEntriesFast()
            tfr = ROOT.TTreeFormula('subruns',
                                    'SubRunAuxiliary.id_.run_.run_',
                                    subrun_tree)
            tfs = ROOT.TTreeFormula('subruns',
                                    'SubRunAuxiliary.id_.subRun_',
                                    subrun_tree)
            for entry in range(nsubruns):
                subrun_tree.GetEntry(entry)
                run = tfr.EvalInstance64()
                subrun = tfs.EvalInstance64()
                run_subrun = (run, subrun)
                if not run_subrun in md['subruns']:
                    md['subruns'].append(run_subrun)

        # Get stream name.

        try:
            stream_name = stream.get_stream(inputfile)
            md['data_stream'] = stream_name
        except:
            pass

    return md
Esempio n. 3
0
def build_and_run(save_to,modelconfig,experimentconfig):
    """ part of this is adapted from lasagne tutorial""" 

    n, num_filters, image_size, num_blockstack = modelconfig['depth'], modelconfig['num_filters'], modelconfig['image_size'], modelconfig['num_blockstack']
    
    print("Amount of bottlenecks: %d" % n)

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('image_features')
    #target_value = T.ivector('targets')
    target_var = T.lmatrix('targets')
    target_vec = T.extra_ops.to_one_hot(target_var[:,0],2)
    #target_var = T.matrix('targets')
    # Create residual net model
    print("Building model...")
    network = build_cnn(input_var, image_size, n, num_blockstack, num_filters)
    get_info(network)
    prediction = lasagne.utils.as_theano_expression(lasagne.layers.get_output(network))
    test_prediction = lasagne.utils.as_theano_expression(lasagne.layers.get_output(network,deterministic=True))

    # Loss function -> The objective to minimize 
    print("Instanciation of loss function...")
 
    #loss = CategoricalCrossEntropy().apply(target_var.flatten(), prediction)
    #test_loss = CategoricalCrossEntropy().apply(target_var.flatten(), test_prediction)
 #   loss = lasagne.objectives.categorical_crossentropy(prediction, target_var.flatten()).mean()
  #  test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var.flatten()).mean()
    loss = lasagne.objectives.squared_error(prediction,target_vec).mean()
    test_loss = lasagne.objectives.squared_error(test_prediction,target_vec).mean()
  #  loss = tensor.nnet.binary_crossentropy(prediction, target_var).mean()
  #  test_loss = tensor.nnet.binary_crossentropy(test_prediction, target_var).mean()
    test_loss.name = "loss"

#    loss.name = 'x-ent_error'
#    loss.name = 'sqr_error'
    layers = lasagne.layers.get_all_layers(network)

    #l1 and l2 regularization
    #pondlayers = {x:0.000025 for i,x in enumerate(layers)}
    #l1_penality = lasagne.regularization.regularize_layer_params_weighted(pondlayers, lasagne.regularization.l2)
    #l2_penality = lasagne.regularization.regularize_layer_params(layers[len(layers)/4:], lasagne.regularization.l1) * 25e-6
    #reg_penalty = l1_penality + l2_penality
    #reg_penalty.name = 'reg_penalty'
    #loss = loss + reg_penalty
    loss.name = 'reg_loss'
    error_rate = MisclassificationRate().apply(target_var.flatten(), test_prediction).copy(
            name='error_rate')

    
    # Load the dataset
    print("Loading data...")
    istest = 'test' in experimentconfig.keys()
    if istest:
        print("Using test stream")
    train_stream, valid_stream, test_stream = get_stream(experimentconfig['batch_size'],image_size,test=istest)

    # Defining step rule and algorithm
    if 'step_rule' in experimentconfig.keys() and not experimentconfig['step_rule'] is None :
        step_rule = experimentconfig['step_rule'](learning_rate=experimentconfig['learning_rate'])
    else :
        step_rule=Scale(learning_rate=experimentconfig['learning_rate'])

    params = map(lasagne.utils.as_theano_expression,lasagne.layers.get_all_params(network, trainable=True))
    print("Initializing algorithm")
    algorithm = GradientDescent(
                cost=loss, gradients={var:T.grad(loss,var) for var in params},#parameters=cg.parameters, #params
                step_rule=step_rule)

    #algorithm.add_updates(extra_updates)


    grad_norm = aggregation.mean(algorithm.total_gradient_norm)
    grad_norm.name = "grad_norm"

    print("Initializing extensions...")
    plot = Plot(save_to, channels=[['train_loss','valid_loss'], 
['train_grad_norm'],
#['train_grad_norm','train_reg_penalty'],
['train_error_rate','valid_error_rate']], server_url='http://hades.calculquebec.ca:5042')    

    checkpoint = Checkpoint('models/best_'+save_to+'.tar')
  #  checkpoint.add_condition(['after_n_batches=25'],

    checkpoint.add_condition(['after_epoch'],
                         predicate=OnLogRecord('valid_error_rate_best_so_far'))

    #Defining extensions
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=experimentconfig['num_epochs'],
                              after_n_batches=experimentconfig['num_batches']),
                  TrainingDataMonitoring([test_loss, error_rate, grad_norm], # reg_penalty],
                  prefix="train", after_epoch=True), #after_n_epochs=1
                  DataStreamMonitoring([test_loss, error_rate],valid_stream,prefix="valid", after_epoch=True), #after_n_epochs=1
                  plot,
                  #Checkpoint(save_to,after_n_epochs=5),
                  #ProgressBar(),
             #     Plot(save_to, channels=[['train_loss','valid_loss'], ['train_error_rate','valid_error_rate']], server_url='http://hades.calculquebec.ca:5042'), #'grad_norm'
                  #       after_batch=True),
                  Printing(after_epoch=True),
                  TrackTheBest('valid_error_rate',min), #Keep best
                  checkpoint,  #Save best
                  FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=5)] # Early-stopping

 #   model = Model(loss)
 #   print("Model",model)


    main_loop = MainLoop(
        algorithm,
        train_stream,
       # model=model,
        extensions=extensions)
    print("Starting main loop...")

    main_loop.run()
import sqlite3
from sqlite3 import Error
import stream as st
from time import strftime

stream = st.create_stream()
tweets, dt, future = st.get_stream(stream)

conn = sqlite3.connect("assign2.db")
db = conn.cursor()
db.execute('drop table if exists HASHTAGS;')
db.execute(
    'create table HASHTAGS (tag_ID integer primary key autoincrement,hashtag text);'
)
conn.commit()

for tweet in tweets:
    if 'entities' in tweet:
        hashtags = tweet['entities']['hashtags']
        for hashtag in hashtags:
            # print "%s" % (hashtag['text'])
            db.execute('INSERT INTO HASHTAGS (hashtag) VALUES (?);',
                       [hashtag['text']])
            conn.commit()

cur = db.execute(
    'select hashtag, count(*) as count from HASHTAGS group by hashtag order by count desc limit 10;'
)
top10 = cur.fetchall()

fp = open('assign2-report.txt', 'w')
Esempio n. 5
0
def build_and_run(label, config):
    ############## CREATE THE NETWORK ###############
    #Define the parameters
    num_epochs, num_batches, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation = config[
        'num_epochs'], config['num_batches'], config['num_channels'], config[
            'image_shape'], config['filter_size'], config[
                'num_filter'], config['pooling_sizes'], config[
                    'mlp_hiddens'], config['output_size'], config[
                        'batch_size'], config['activation'], config[
                            'mlp_activation']
    #    print(num_epochs, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation)
    lambda_l1 = 0.000025
    lambda_l2 = 0.000025

    print("Building model")
    #Create the symbolics variable
    x = T.tensor4('image_features')
    y = T.lmatrix('targets')

    #Get the parameters
    conv_parameters = zip(filter_size, num_filter)

    #Create the convolutions layers
    conv_layers = list(
        interleave([(Convolutional(filter_size=filter_size,
                                   num_filters=num_filter,
                                   name='conv_{}'.format(i))
                     for i, (filter_size,
                             num_filter) in enumerate(conv_parameters)),
                    (activation),
                    (MaxPooling(size, name='pool_{}'.format(i))
                     for i, size in enumerate(pooling_sizes))]))
    #    (AveragePooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))

    #Create the sequence
    conv_sequence = ConvolutionalSequence(conv_layers,
                                          num_channels,
                                          image_size=image_shape,
                                          weights_init=Uniform(width=0.2),
                                          biases_init=Constant(0.))
    #Initialize the convnet
    conv_sequence.initialize()
    #Add the MLP
    top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))
                    ] + mlp_hiddens + [output_size]
    out = Flattener().apply(conv_sequence.apply(x))
    mlp = MLP(mlp_activation,
              top_mlp_dims,
              weights_init=Uniform(0, 0.2),
              biases_init=Constant(0.))
    #Initialisze the MLP
    mlp.initialize()
    #Get the output
    predict = mlp.apply(out)

    cost = CategoricalCrossEntropy().apply(y.flatten(),
                                           predict).copy(name='cost')
    error = MisclassificationRate().apply(y.flatten(), predict)

    #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason)
    error_rate = error.copy(name='error_rate')
    error_rate2 = error.copy(name='error_rate2')

    ########### REGULARIZATION ##################
    cg = ComputationGraph([cost])
    weights = VariableFilter(roles=[WEIGHT])(cg.variables)
    biases = VariableFilter(roles=[BIAS])(cg.variables)
    # # l2_penalty_weights = T.sum([i*lambda_l2/len(weights) * (W ** 2).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer
    l2_penalty = T.sum([
        lambda_l2 * (W**2).sum() for i, W in enumerate(weights + biases)
    ])  # Gradually increase penalty for layer
    # # #l2_penalty_bias = T.sum([lambda_l2*(B **2).sum() for B in biases])
    # # #l2_penalty = l2_penalty_weights + l2_penalty_bias
    l2_penalty.name = 'l2_penalty'
    l1_penalty = T.sum([lambda_l1 * T.abs_(z).sum() for z in weights + biases])
    #  l1_penalty_weights = T.sum([i*lambda_l1/len(weights) * T.abs_(W).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer
    #  l1_penalty_biases = T.sum([lambda_l1 * T.abs_(B).sum() for B in biases])
    #  l1_penalty = l1_penalty_biases + l1_penalty_weights
    l1_penalty.name = 'l1_penalty'
    costreg = cost + l2_penalty + l1_penalty
    costreg.name = 'costreg'

    ########### DEFINE THE ALGORITHM #############
    #  algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum())
    algorithm = GradientDescent(cost=costreg,
                                parameters=cg.parameters,
                                step_rule=Adam())

    ########### GET THE DATA #####################
    istest = 'test' in config.keys()
    train_stream, valid_stream, test_stream = get_stream(batch_size,
                                                         image_shape,
                                                         test=istest)

    ########### INITIALIZING EXTENSIONS ##########
    checkpoint = Checkpoint('models/best_' + label + '.tar')
    checkpoint.add_condition(
        ['after_epoch'], predicate=OnLogRecord('valid_error_rate_best_so_far'))
    #Adding a live plot with the bokeh server
    plot = Plot(
        label,
        channels=[
            ['train_error_rate', 'valid_error_rate'],
            ['valid_cost', 'valid_error_rate2'],
            # ['train_costreg','train_grad_norm']], #
            [
                'train_costreg', 'train_total_gradient_norm',
                'train_l2_penalty', 'train_l1_penalty'
            ]
        ],
        server_url="http://hades.calculquebec.ca:5042")

    grad_norm = aggregation.mean(algorithm.total_gradient_norm)
    grad_norm.name = 'grad_norm'

    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches),
        DataStreamMonitoring([cost, error_rate, error_rate2],
                             valid_stream,
                             prefix="valid"),
        TrainingDataMonitoring([
            costreg, error_rate, error_rate2, grad_norm, l2_penalty, l1_penalty
        ],
                               prefix="train",
                               after_epoch=True),
        plot,
        ProgressBar(),
        Printing(),
        TrackTheBest('valid_error_rate', min),  #Keep best
        checkpoint,  #Save best
        FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=4)
    ]  # Early-stopping
    model = Model(cost)
    main_loop = MainLoop(algorithm,
                         data_stream=train_stream,
                         model=model,
                         extensions=extensions)
    main_loop.run()
Esempio n. 6
0
def build_and_run(label, config):
    ############## CREATE THE NETWORK ###############
    #Define the parameters
    num_epochs, num_batches, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation  = config['num_epochs'], config['num_batches'], config['num_channels'], config['image_shape'], config['filter_size'], config['num_filter'], config['pooling_sizes'], config['mlp_hiddens'], config['output_size'], config['batch_size'], config['activation'], config['mlp_activation']
#    print(num_epochs, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation)
    lambda_l1 = 0.000025
    lambda_l2 = 0.000025

    print("Building model")
    #Create the symbolics variable
    x = T.tensor4('image_features')
    y = T.lmatrix('targets')

    #Get the parameters
    conv_parameters = zip(filter_size, num_filter)

    #Create the convolutions layers
    conv_layers = list(interleave([(Convolutional(
                                      filter_size=filter_size,
                                      num_filters=num_filter,
                                      name='conv_{}'.format(i))
                    for i, (filter_size, num_filter)
                    in enumerate(conv_parameters)),
                  (activation),
            (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))
        #    (AveragePooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))

    #Create the sequence
    conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.))
    #Initialize the convnet
    conv_sequence.initialize()
    #Add the MLP
    top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size]
    out = Flattener().apply(conv_sequence.apply(x))
    mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2),
              biases_init=Constant(0.))
    #Initialisze the MLP
    mlp.initialize()
    #Get the output
    predict = mlp.apply(out)

    cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost')
    error = MisclassificationRate().apply(y.flatten(), predict)

    #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason)
    error_rate = error.copy(name='error_rate')
    error_rate2 = error.copy(name='error_rate2')

    ########### REGULARIZATION ##################
    cg = ComputationGraph([cost])
    weights = VariableFilter(roles=[WEIGHT])(cg.variables)
    biases = VariableFilter(roles=[BIAS])(cg.variables)
  # # l2_penalty_weights = T.sum([i*lambda_l2/len(weights) * (W ** 2).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer
    l2_penalty = T.sum([lambda_l2 * (W ** 2).sum() for i,W in enumerate(weights+biases)]) # Gradually increase penalty for layer
  # # #l2_penalty_bias = T.sum([lambda_l2*(B **2).sum() for B in biases])
  # # #l2_penalty = l2_penalty_weights + l2_penalty_bias
    l2_penalty.name = 'l2_penalty'
    l1_penalty = T.sum([lambda_l1*T.abs_(z).sum() for z in weights+biases])
  #  l1_penalty_weights = T.sum([i*lambda_l1/len(weights) * T.abs_(W).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer    
  #  l1_penalty_biases = T.sum([lambda_l1 * T.abs_(B).sum() for B in biases])
  #  l1_penalty = l1_penalty_biases + l1_penalty_weights
    l1_penalty.name = 'l1_penalty'
    costreg = cost + l2_penalty + l1_penalty
    costreg.name = 'costreg'
    
    ########### DEFINE THE ALGORITHM #############
  #  algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum())
    algorithm = GradientDescent(cost=costreg, parameters=cg.parameters, step_rule=Adam())

    ########### GET THE DATA #####################
    istest = 'test' in config.keys()
    train_stream, valid_stream, test_stream = get_stream(batch_size,image_shape,test=istest)
    

    ########### INITIALIZING EXTENSIONS ##########
    checkpoint = Checkpoint('models/best_'+label+'.tar')
    checkpoint.add_condition(['after_epoch'],
                         predicate=OnLogRecord('valid_error_rate_best_so_far'))
    #Adding a live plot with the bokeh server
    plot = Plot(label,
        channels=[['train_error_rate', 'valid_error_rate'],
                  ['valid_cost', 'valid_error_rate2'],
                 # ['train_costreg','train_grad_norm']], #  
                 ['train_costreg','train_total_gradient_norm','train_l2_penalty','train_l1_penalty']],
                  server_url="http://hades.calculquebec.ca:5042")  
   
    grad_norm = aggregation.mean(algorithm.total_gradient_norm)
    grad_norm.name = 'grad_norm'

    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs,
                  after_n_batches=num_batches),
                  DataStreamMonitoring([cost, error_rate, error_rate2], valid_stream, prefix="valid"),
                  TrainingDataMonitoring([costreg, error_rate, error_rate2,
                    grad_norm,l2_penalty,l1_penalty],
                     prefix="train", after_epoch=True),
                  plot,
                  ProgressBar(),
                  Printing(),
                  TrackTheBest('valid_error_rate',min), #Keep best
                  checkpoint,  #Save best
                  FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=4)] # Early-stopping                  
    model = Model(cost)
    main_loop = MainLoop(algorithm,data_stream=train_stream,model=model,extensions=extensions)
    main_loop.run()
Esempio n. 7
0
def build_and_run(experimentconfig, modelconfig, save_to=None): #modelconfig, 
    """ part of this is adapted from lasagne tutorial""" 
    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('image_features')
    target_var = T.lmatrix('targets')
    target_vec = T.extra_ops.to_one_hot(target_var[:,0],2)

    # Create vgg model
    print("Building model...")

    image_size = modelconfig['image_size']
    network = vgg16.build_small_model()
    prediction = lasagne.utils.as_theano_expression(lasagne.layers.get_output(network["prob"],input_var))
#    test_prediction = lasagne.layers.get_output(network["prob"],input_var,deterministic=True)

    # Loss function -> The objective to minimize 
    print("Instanciation of loss function...")
 
 #  loss = lasagne.objectives.categorical_crossentropy(prediction, target_var.flatten())
    loss = lasagne.objectives.squared_error(prediction,target_vec)
 #   test_loss = lasagne.objectives.squared_error(test_prediction,target_vec)
    loss = loss.mean()

   # layers = network.values()  
    #l1 and l2 regularization
   # pondlayers = {x:0.01 for x in layers}
   # l1_penality = lasagne.regularization.regularize_layer_params_weighted(pondlayers, lasagne.regularization.l2)
   # l2_penality = lasagne.regularization.regularize_layer_params(layers[len(layers)/4:], lasagne.regularization.l1) * 1e-4
   # reg_penalty = l1_penality + l2_penality
   # reg_penalty.name = 'reg_penalty'
    #loss = loss + reg_penalty
    loss.name = 'loss'

    error_rate = MisclassificationRate().apply(target_var.flatten(), prediction).copy(
            name='error_rate')

    # Load the dataset
    print("Loading data...")
    if 'test' in experimentconfig.keys() and experimentconfig['test'] is True:
        train_stream, valid_stream, test_stream = get_stream(experimentconfig['batch_size'],image_size,test=True)
    else :
        train_stream, valid_stream, test_stream = get_stream(experimentconfig['batch_size'],image_size,test=False)

    # Defining step rule and algorithm
    if 'step_rule' in experimentconfig.keys() and not experimentconfig['step_rule'] is None :
        step_rule = experimentconfig['step_rule'](learning_rate=experimentconfig['learning_rate'])
    else :
        step_rule=Scale(learning_rate=experimentconfig['learning_rate'])

    params = map(lasagne.utils.as_theano_expression,lasagne.layers.get_all_params(network['prob'], trainable=True))

    algorithm = GradientDescent(
                cost=loss, gradients={var:T.grad(loss,var) for var in params},
                step_rule=step_rule)

    grad_norm = aggregation.mean(algorithm.total_gradient_norm) 
    grad_norm.name='grad_norm'   

    print("Initializing extensions...")
    plot = Plot(save_to, channels=[['train_loss','valid_loss','train_grad_norm'],['train_error_rate','valid_error_rate']], server_url='http://hades.calculquebec.ca:5042')    
    checkpoint = Checkpoint('models/best_'+save_to+'.tar')
  #  checkpoint.add_condition(['after_n_batches=25'],
    checkpoint.add_condition(['after_epoch'],
                         predicate=OnLogRecord('valid_error_rate_best_so_far'))

    #Defining extensions
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=experimentconfig['num_epochs'],
                              after_n_batches=experimentconfig['num_batches']),
                  TrainingDataMonitoring([loss, error_rate, grad_norm, reg_penalty], prefix="train", after_epoch=True), #after_n_epochs=1
                  DataStreamMonitoring([loss, error_rate],valid_stream,prefix="valid", after_epoch=True), #after_n_epochs=1
                  #Checkpoint(save_to,after_n_epochs=5),
                  #ProgressBar(),
                  plot,
                  #       after_batch=True),
                  Printing(after_epoch=True),
                  TrackTheBest('valid_error_rate',min), #Keep best
                  checkpoint,  #Save best
                  FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=5)] # Early-stopping

   # model = Model(ComputationGraph(network))

    main_loop = MainLoop(
        algorithm,
        train_stream,
      #  model=model,
        extensions=extensions)
    print("Starting main loop...")

    main_loop.run()
Esempio n. 8
0
import sqlite3
from sqlite3 import Error
import stream as twitter_stream
from time import strftime


stream = twitter_stream.create_stream()
tweet_buff, date_time, future = twitter_stream.get_stream(stream)

connection = sqlite3.connect("tweets.db")
tweet_db = connection.cursor()
tweet_db.execute('drop table if exists POPULAR_HASHTAG;')
tweet_db.execute('create table POPULAR_HASHTAG (tag_ID integer primary key autoincrement,hashtag text);')
connection.commit()
fp = open('report.txt', 'w')

for tweet in tweet_buff:
    if 'entities' in tweet:
        hashtags = tweet['entities']['hashtags']
        for hashtag in hashtags:
            tweet_db.execute('INSERT INTO POPULAR_HASHTAG (hashtag) VALUES (?);', [hashtag['text']])
            connection.commit()

result = tweet_db.execute('select hashtag, count(*) as count from POPULAR_HASHTAG group by hashtag order by count desc limit 10;')
popHashtags = result.fetchall()

print('{}\t{}'.format(date_time,future))
fp.write('{}\t{}\n'.format(date_time,future))
for popHashtag in popHashtags:
    print(popHashtag[0].encode('utf-8') + '\t{}'.format(popHashtag[1])) #encoding to  priduce hashtags in that are not in english
    fp.write(popHashtag[0].encode('utf-8') + '\t{}\n'.format(popHashtag[1]))