def FromJSON(path: str, keywordsPath: str, on_generate: Callable[Data, int, int] = None) -> DataSet: rawdata = [] keywords = {} with open(keywordsPath, 'r', encoding='utf-8') as f: keywords = json.load(f) with open(path, 'r', encoding='utf-8') as f: rawdata = json.load(f) if type(rawdata) is not list: rawdata = [] data = [] index = 0 raw_len = len(rawdata) for d in rawdata: if type(d) is not dict: continue source = d.get('source', '???').lower() title = d.get('title', '').lower() text = d.get('text', '').lower() score = 0 for negative in keywords.get('negative', []): if text.find(negative.lower()) != -1: score -= 1 for positive in keywords.get('positive', []): if text.find(positive.lower()) != -1: score += 1 category = DataSet.CategoryFromScore(score) result = Data(text, category, source, title, score) if on_generate is not None: on_generate(result, index, raw_len) data.append(result) index += 1 return DataSet(data)
def get_info(): vehicletable = Data('Vehicles.txt', [ 'License Plate', 'Manufacturer', 'Model', 'Year', 'Location', 'Category' ], [str, str, str, int, str, str]) ordertable = Data('Orders.txt', [ 'Order ID', 'Customer', 'Vehicle', 'Start Date', 'End Date', 'Extra Insurance', 'GPS' ], [ID, str, str, datetime, datetime, bool, bool]) for item in ordertable.get_rows(): order_list.append(item.values()) for item in vehicletable.get_rows(): car_list.append(item.values()) for item in ordertable.get_rows(): if item[3].value() <= datetime.now() < (item[4].value() + timedelta(days=1)): occupied.append(item[2].value())
def FromAny(text: Union[str, Iterable[str], Iterable[Data], pd.DataFrame], category: Union[str, Iterable[str]] = None) -> DataSet: data = None if type(text) is str and type(category) is str: data = DataSet([Data(text, category)]) elif isinstance(text, pd.DataFrame): data = DataSet.FromDataFrame(text) elif isinstance(text, DataSet): data = text elif isinstance(text, IterableType): if type(category) is str: data = DataSet([Data(x, category) for x in text]) elif isinstance(category, IterableType): data = DataSet( [Data(text[i], category[i]) for i in range(len(text))]) if not data: raise Exception( 'The given input is not supported: <{}, {}>.\nUse <str, str>, <str[], str|str[]>, <pandas.Dataframe[words, word_count] | Data[], Unknown>' .format(type(text), type(category))) return data
class Main(): #main holds the current running class main = None #creates all class objects config = Config() load_data = Load_data() data = Data() under = Under() #array for all databases databases_names = [] for database_name in os.listdir('databases'): databases_names.append("databases/" + database_name) def main(self, main): self.main = main self.main.config.config(self.main) self.main.data.data(self.config) #self.main.load_data.load_data(self.main, self.config) self.main.check_load_files() def new_matches(self, name): if (input("Would you like to collect data by " + name + " again: ").lower() == "yes"): return True else: return False def check_load_files(self): matchesbypermno = False up = False for file in os.listdir("."): if (file == ".matchesbypermno"): matchesbypermno = True elif (file == ".up"): up = True if (not matchesbypermno): self.main.data.match_by_permno_and_date_wrds_and_xls_or_txt( "permno", "permno", "crsp.wrds", "final.xls") up = False elif (self.new_matches("permno and date")): self.main.data.match_by_permno_and_date_wrds_and_xls_or_txt( "permno", "permno", "crsp.wrds", "final.xls") up = False if (not up): self.main.under.wrds_and_xml_or_txt_under(self.config, self.data) elif (self.new_up("Under Pricing?")): self.main.under.wrds_and_xml_or_txt_under(self.config, self.data)
def FromDataFrame(dataframe: pd.DataFrame) -> DataSet: data = [] dataview = dataframe.loc for index in dataframe.index: row = dataview[index] text = str.join(' ', [ i * row[i] for i in row.index if i != Data.CATEGORY_NAME and i != Data.SCORE_NAME ]) if Data.CATEGORY_NAME in row: score = row[Data.CATEGORY_NAME] elif Data.SCORE_NAME in row: score = row[Data.SCORE_NAME] else: score = 0 category = DataSet.CategoryFromScore(score) data.append(Data(text, category=category, score=score)) return DataSet(data)
nun_days = 910 #numero de candles batch_size = 1 #divisao em blocos #¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨ #instanciar objetos """ Sobre os dados Estes dados são informações retiradas da BMF Bovespa, o periodo é Intraday,além das informações que formam um candlestick, são associados as colunas, informações de indicadores técnicos. Index(['Hora', 'dif', 'retracao +', 'retracao -', 'RSI', 'M22M44', 'M22M66', 'M66M44', 'ADX', 'ATR', 'Momentum', 'CCI', 'Bears', 'Bulls', 'Stock1', 'Stock2', 'Wilians', 'Std', 'MFI', 'target'], dtype='object') O rótulos são iformações que consideram a tendência do preços, 1: compra, 2: venda e 0:sem operação """ data = Data(nun_days, batch_size) entrada, entrada_trader, base, media, std = data.import_data() labels = Labels() data_labels = labels.index_labels(base, entrada) print('Nome das colunas: ', data_labels.columns) print('Quantidade de cada categória: ', data_labels.target.value_counts()) """ Normalização dos dados A padronização de dados dá aos dados média zero e variação unitária, é uma boa prática, especialmente para algoritmos como KNN, que é baseado na distância dos casos: """ #separando os dados colunas = [ 'Hora', 'dif', 'retracao +', 'retracao -', 'RSI', 'M22M44', 'M22M66', 'M66M44', 'ADX', 'ATR', 'Momentum', 'CCI', 'Bears', 'Bulls', 'Stock1',
import seaborn as sns sns.set() import matplotlib.pyplot as plt from Timer import Timer import lmfit # Construct discretized domain object for hybrid model domain = Domain(name='Domain') domain.add_axis(x_min=5, x_max=100, m=30, disc_by='FeretMean', name='FeretMean') # Create data-set and set up data-shuffler data = Data(case_id='Laboratory lactose case study') data.load_from_pickle( 'C:/Users/rfjoni/PycharmProjects/ParticleModel/projects/CACE_cases/CACE_lactose_study/lactose' ) data.batches[2].batch_id = 'Batch 1' data.batches[3].batch_id = 'Batch 2' # Convert time and temperature data to polynomial fit # Batch 1 t_batch1 = [ (measurement.time - data.batches[2].measurements[0].time).total_seconds() for measurement in data.batches[2].measurements ] T_batch1 = [ measurement.external_sensors[2].value for measurement in data.batches[2].measurements
def train(): data = Data() data.read_data(filepath='data/train.csv', train_size=TRAIN_SIZE, validation_size=VALIDATION_SIZE, convert_to_one_hot=True) #data.train.display_digit() sess = tf.InteractiveSession() def variable_summaries(var): """Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(var)) tf.summary.scalar('min', tf.reduce_min(var)) tf.summary.histogram('histogram', var) with tf.name_scope('input'): input_layer = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE]) output_layer = tf.placeholder(tf.float32, shape=[None, N_CLASSES]) with tf.name_scope('reshape_input'): image_shaped_input = tf.reshape(input_layer, [-1, 28, 28, 1]) tf.summary.image('input', image_shaped_input) def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) def convolution_2d(input_tensor, input_dimension, nb_filter, filter_size, name, activation=tf.nn.relu): with tf.name_scope(name): with tf.name_scope('weights'): weights = weight_variable( [filter_size, filter_size, input_dimension, nb_filter]) variable_summaries(weights) with tf.name_scope('biases'): biases = bias_variable([nb_filter]) variable_summaries(biases) with tf.name_scope('preactivation'): preactivate = conv2d(input_tensor, weights) + biases # !!! tf.summary.histogram('pre-activation', preactivate) activations = activation(preactivate, name='activation') tf.summary.histogram('activations', activations) return activations def conv2d(input_tensor, weights): return tf.nn.conv2d(input_tensor, weights, strides=[1, 2, 2, 1], padding='SAME') def max_pool_2d(input_tensor, kernel_size, name): with tf.name_scope(name): return tf.nn.max_pool( input_tensor, ksize=[1, 2, 2, 1], # kernel size? strides=[1, 2, 2, 1], padding='SAME') def fully_connected(input_tensor, image_size, nb_filter, n_units, name, activation): with tf.name_scope(name): with tf.name_scope('weights'): weights = weight_variable( [image_size * image_size * nb_filter, n_units]) variable_summaries(weights) with tf.name_scope('biases'): biases = bias_variable([n_units]) variable_summaries(biases) with tf.name_scope('preactivation'): input_tensor_flat = tf.reshape( input_tensor, [-1, image_size * image_size * nb_filter]) preactivate = tf.matmul(input_tensor_flat, weights) + biases # same as convo tf.summary.histogram('pre-activation', preactivate) if activation == 'NONE': return preactivate else: activations = activation(preactivate, name='activation') tf.summary.histogram('activations', activations) return activations with tf.name_scope('neural_network_architecture'): conv_1 = convolution_2d(image_shaped_input, 1, nb_filter=16, filter_size=3, activation=tf.nn.relu, name='convolutional_layer_1') conv_2 = convolution_2d(conv_1, 16, nb_filter=32, filter_size=3, activation=tf.nn.relu, name='convolutional_layer_2') pool_1 = max_pool_2d(conv_2, kernel_size=2, name='pool_layer_1') conv_3 = convolution_2d(pool_1, 32, nb_filter=64, filter_size=3, activation=tf.nn.relu, name='convolutional_layer_3') conv_4 = convolution_2d(conv_3, 64, nb_filter=128, filter_size=3, activation=tf.nn.relu, name='convolutional_layer_4') pool_2 = max_pool_2d(conv_4, kernel_size=2, name='pool_layer_2') fc_1 = fully_connected(pool_2, 1, nb_filter=128, n_units=2048, activation=tf.nn.relu, name='fully_connected_1') fc_2 = fully_connected(fc_1, 1, nb_filter=2048, n_units=512, activation=tf.nn.relu, name='fully_connected_2') with tf.name_scope('dropout'): keep_prob = tf.placeholder(tf.float32) tf.summary.scalar('dropout_keep_probability', keep_prob) dropped = tf.nn.dropout(fc_2, keep_prob) y = fully_connected(dropped, 1, nb_filter=512, n_units=10, activation=tf.nn.softmax, name='fully_connected_3') with tf.name_scope('loss_function'): diff = tf.nn.softmax_cross_entropy_with_logits(labels=output_layer, logits=y) with tf.name_scope('total'): cross_entropy = tf.reduce_mean(diff) tf.summary.scalar('cross_entropy', cross_entropy) with tf.name_scope('optimizer'): train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize( cross_entropy) with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(output_layer, 1)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(LOG_DIR + '/train', sess.graph) test_writer = tf.summary.FileWriter(LOG_DIR + '/test') tf.global_variables_initializer().run() print("\nTraining the network...") t = trange(EPOCHS * data.train.images.shape[0] // BATCH_SIZE) for i in t: # selecting a batch batch_x, batch_y = data.train.batch(BATCH_SIZE) # evaluating if i % 10 == 0: summary, acc = sess.run( [merged, accuracy], feed_dict={ input_layer: data.validation.images, output_layer: data.validation.labels, keep_prob: 1.0 }) test_writer.add_summary(summary, i) print('Accuracy at step %s: %s' % (i, acc)) else: # Record train set summaries, and train if i % 100 == 99: # Record execution stats run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() summary, _ = sess.run( [merged, train_step], feed_dict={ input_layer: data.train.images, output_layer: data.train.labels, keep_prob: DROP_OUT }, options=run_options, run_metadata=run_metadata) train_writer.add_run_metadata(run_metadata, 'step%03d' % i) train_writer.add_summary(summary, i) print('Adding run metadata for', i) else: # Record a summary summary, _ = sess.run( [merged, train_step], feed_dict={ input_layer: data.train.images, output_layer: data.train.labels, keep_prob: DROP_OUT }) train_writer.add_summary(summary, i) train_writer.close() test_writer.close() def getActivations(layer, stimuli): units = sess.run(layer, feed_dict={ input_layer: np.reshape(stimuli, [1, 784], order='F'), keep_prob: 1.0 }) plotNNFilter(units) def plotNNFilter(units): filters = units.shape[3] plt.figure(1, figsize=(20, 20)) n_columns = 6 n_rows = math.ceil(filters / n_columns) + 1 for i in range(filters): plt.subplot(n_rows, n_columns, i + 1) plt.title('Filter ' + str(i)) plt.imshow(units[0, :, :, i], interpolation="nearest", cmap="gray") imageToUse = data.train.images[0] data.train.display_digit() plt.imshow(np.reshape(imageToUse, [28, 28]), interpolation="nearest", cmap="gray") plt.show() #getActivations(conv_1, imageToUse) #getActivations(conv_2, imageToUse) #getActivations(conv_3, imageToUse) getActivations(conv_4, imageToUse) print('h') plt.show()
def start(self): # tf Graph self.x = tf.placeholder("float", [ None, self.params["post_padding_size"], self.params["comment_padding_size"], self.params["word2vec_dim"] ], name="input_x") self.y_sentiment = tf.placeholder("float", [ None, self.params["post_padding_size"], self.params["n_classes_sentiment"] ], name="input_y_sentiment") self.y_topics = tf.placeholder("float", [ None, self.params["post_padding_size"], self.params["n_classes_topics"] ], name="input_y_topics") self.y_emotion = tf.placeholder("float", [ None, self.params["post_padding_size"], self.params["n_classes_emotion"] ], name="input_y_emotion") self.y_speech_acts = tf.placeholder("float", [ None, self.params["post_padding_size"], self.params["n_classes_speech_acts"] ], name="input_y_speech_acts") self.keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.sequence_length = tf.placeholder(tf.int32, [None]) fully_connected_params = { "out_sentiment_w": tf.Variable( tf.random_normal([ self.params["n_hidden"], self.params["n_classes_sentiment"] ])), "out_topics_w": tf.Variable( tf.random_normal( [self.params["n_hidden"], self.params["n_classes_topics"]])), "out_emotion_w": tf.Variable( tf.random_normal([ self.params["n_hidden"], self.params["n_classes_emotion"] ])), "out_speech_acts_w": tf.Variable( tf.random_normal([ self.params["n_hidden"], self.params["n_classes_speech_acts"] ])), "out_sentiment_b": tf.Variable(tf.random_normal([self.params["n_classes_sentiment"] ])), "out_topics_b": tf.Variable(tf.random_normal([self.params["n_classes_topics"]])), "out_emotion_b": tf.Variable(tf.random_normal([self.params["n_classes_emotion"]])), "out_speech_acts_b": tf.Variable( tf.random_normal([self.params["n_classes_speech_acts"]])) } self.lstm = Lstm(params=self.params, fully_connected_params=fully_connected_params) # get predictions self.predictions = self.lstm.model( x=self.cnn.model(self.x, self.keep_prob), sequence_length=self.sequence_length, keep_prob=self.keep_prob) # define loss with tf.name_scope("loss_sentiment"): self.cost_sentiment = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=self.predictions["prediction_sentiment"], labels=self.y_sentiment)) with tf.name_scope("loss_topics"): self.cost_topics = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=self.predictions["prediction_topics"], labels=self.y_topics)) with tf.name_scope("loss_emotions"): self.cost_emotions = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=self.predictions["prediction_emotion"], labels=self.y_emotion)) with tf.name_scope("loss_speech_acts"): self.cost_speech_acts = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=self.predictions["prediction_speech_acts"], labels=self.y_speech_acts)) # define optimizer self.optimizer = tf.train.AdamOptimizer( learning_rate=self.params["learning_rate"]).minimize( self.cost_sentiment + self.cost_topics + self.cost_emotions + self.cost_speech_acts) # evaluate model with tf.name_scope("accuracy_sentiment"): correct_pred_sentiment = tf.equal( tf.argmax(self.predictions["prediction_sentiment"], 1), tf.argmax(self.y_sentiment, 1)) self.accuracy_sentiment = tf.reduce_mean( tf.cast(correct_pred_sentiment, tf.float32)) with tf.name_scope("accuracy_topics"): correct_pred_topics = tf.equal( tf.round(tf.nn.sigmoid(self.predictions["prediction_topics"])), tf.round(self.y_topics)) self.accuracy_topics = tf.reduce_mean( tf.cast(correct_pred_topics, tf.float32)) with tf.name_scope("accuracy_emotion"): correct_pred_emotion = tf.equal( tf.round(tf.nn.sigmoid( self.predictions["prediction_emotion"])), tf.round(self.y_emotion)) self.accuracy_emotion = tf.reduce_mean( tf.cast(correct_pred_emotion, tf.float32)) with tf.name_scope("accuracy_speech_acts"): correct_pred_speech_acts = tf.equal( tf.round( tf.nn.sigmoid(self.predictions["prediction_speech_acts"])), tf.round(self.y_speech_acts)) self.accuracy_speech_acts = tf.reduce_mean( tf.cast(correct_pred_speech_acts, tf.float32)) # initializing the variables self.init = tf.global_variables_initializer() # 'Saver' op to save and restore all the variables self.saver = tf.train.Saver() # get data object self.data = Data( filename='data/word2vec/wiki.hr.vec', comment_padding_size=self.params["comment_padding_size"], post_padding_size=self.params["post_padding_size"], word2vec_dim=self.params["word2vec_dim"], binary_sentiment=self.params["binary_sentiment"]) self.runs.create_run() # START LEARNING!!! self.learn()
'agglomeration': 1/1000, 'breakage': 1/1000}) # Define model system system = System(case="Laboratory lactose case study", domain=domain, ode_settings=ode_settings, loss_settings=loss_settings, rate_settings=rate_settings, dilution=False, regularization=1, normalize=True) # Adding sensors system.add_sensor(name='Temperature', measured=True, controlled=True, unit='C') system.add_sensor(name='Concentration', measured=True, controlled=False, unit='g/µL') # Activate phenomena system.activate_phenomena(['nucleation', 'growth']) # Create data-set and set up data-shuffler data = Data(case_id='Demo data') data.load_from_pickle('demo_data') time_series_pair = TimeSeriesPair(data=data, system=system) # Split training and validation data data.set_batch_pool(pool_batch_id=['Demo batch 0', 'Demo batch 1', 'Demo batch 2', 'Demo batch 3'], pool_type='Training') data.set_batch_pool(pool_batch_id=['Demo batch 4', 'Demo batch 5', 'Demo batch 6', 'Demo batch 7', 'Demo batch 8', 'Demo batch 9'], pool_type='Validation') data.set_batch_pool(pool_batch_id=['Demo batch 4'], pool_type='Test') # Set up hybrid training model hybrid_model = HybridModel(system=system) # Compile hybrid model hybrid_model.training_model.compile(loss=hybrid_model.loss_model.loss, optimizer='Adam') # Generate shuffled training and evaluation data