Python Data.Data 예제들, data.Data.Data.Data Python 예제들

예제 #1

0

파일 보기

파일: DataSet.py 프로젝트: ddomen/FAINaiveBayesOpinionClassifier

    def FromJSON(path: str,
                 keywordsPath: str,
                 on_generate: Callable[Data, int, int] = None) -> DataSet:
        rawdata = []
        keywords = {}
        with open(keywordsPath, 'r', encoding='utf-8') as f:
            keywords = json.load(f)
        with open(path, 'r', encoding='utf-8') as f:
            rawdata = json.load(f)
        if type(rawdata) is not list: rawdata = []
        data = []
        index = 0
        raw_len = len(rawdata)
        for d in rawdata:
            if type(d) is not dict: continue
            source = d.get('source', '???').lower()
            title = d.get('title', '').lower()
            text = d.get('text', '').lower()
            score = 0
            for negative in keywords.get('negative', []):
                if text.find(negative.lower()) != -1: score -= 1

            for positive in keywords.get('positive', []):
                if text.find(positive.lower()) != -1: score += 1
            category = DataSet.CategoryFromScore(score)
            result = Data(text, category, source, title, score)
            if on_generate is not None: on_generate(result, index, raw_len)
            data.append(result)
            index += 1
        return DataSet(data)

예제 #2

0

파일 보기

def get_info():
    vehicletable = Data('Vehicles.txt', [
        'License Plate', 'Manufacturer', 'Model', 'Year', 'Location',
        'Category'
    ], [str, str, str, int, str, str])
    ordertable = Data('Orders.txt', [
        'Order ID', 'Customer', 'Vehicle', 'Start Date', 'End Date',
        'Extra Insurance', 'GPS'
    ], [ID, str, str, datetime, datetime, bool, bool])

    for item in ordertable.get_rows():
        order_list.append(item.values())
    for item in vehicletable.get_rows():
        car_list.append(item.values())
    for item in ordertable.get_rows():
        if item[3].value() <= datetime.now() < (item[4].value() +
                                                timedelta(days=1)):
            occupied.append(item[2].value())

예제 #3

0

파일 보기

파일: DataSet.py 프로젝트: ddomen/FAINaiveBayesOpinionClassifier

 def FromAny(text: Union[str, Iterable[str], Iterable[Data], pd.DataFrame],
             category: Union[str, Iterable[str]] = None) -> DataSet:
     data = None
     if type(text) is str and type(category) is str:
         data = DataSet([Data(text, category)])
     elif isinstance(text, pd.DataFrame):
         data = DataSet.FromDataFrame(text)
     elif isinstance(text, DataSet):
         data = text
     elif isinstance(text, IterableType):
         if type(category) is str:
             data = DataSet([Data(x, category) for x in text])
         elif isinstance(category, IterableType):
             data = DataSet(
                 [Data(text[i], category[i]) for i in range(len(text))])
     if not data:
         raise Exception(
             'The given input is not supported: <{}, {}>.\nUse <str, str>, <str[], str|str[]>, <pandas.Dataframe[words, word_count] | Data[], Unknown>'
             .format(type(text), type(category)))
     return data

예제 #4

0

파일 보기

class Main():
    #main holds the current running class
    main = None

    #creates all class objects
    config = Config()
    load_data = Load_data()
    data = Data()
    under = Under()

    #array for all databases
    databases_names = []

    for database_name in os.listdir('databases'):
        databases_names.append("databases/" + database_name)

    def main(self, main):
        self.main = main
        self.main.config.config(self.main)
        self.main.data.data(self.config)
        #self.main.load_data.load_data(self.main, self.config)
        self.main.check_load_files()

    def new_matches(self, name):
        if (input("Would you like to collect data by " + name +
                  " again: ").lower() == "yes"):
            return True
        else:
            return False

    def check_load_files(self):
        matchesbypermno = False
        up = False
        for file in os.listdir("."):
            if (file == ".matchesbypermno"):
                matchesbypermno = True
            elif (file == ".up"):
                up = True
        if (not matchesbypermno):
            self.main.data.match_by_permno_and_date_wrds_and_xls_or_txt(
                "permno", "permno", "crsp.wrds", "final.xls")
            up = False
        elif (self.new_matches("permno and date")):
            self.main.data.match_by_permno_and_date_wrds_and_xls_or_txt(
                "permno", "permno", "crsp.wrds", "final.xls")
            up = False
        if (not up):
            self.main.under.wrds_and_xml_or_txt_under(self.config, self.data)
        elif (self.new_up("Under Pricing?")):
            self.main.under.wrds_and_xml_or_txt_under(self.config, self.data)

예제 #5

0

파일 보기

파일: DataSet.py 프로젝트: ddomen/FAINaiveBayesOpinionClassifier

 def FromDataFrame(dataframe: pd.DataFrame) -> DataSet:
     data = []
     dataview = dataframe.loc
     for index in dataframe.index:
         row = dataview[index]
         text = str.join(' ', [
             i * row[i] for i in row.index
             if i != Data.CATEGORY_NAME and i != Data.SCORE_NAME
         ])
         if Data.CATEGORY_NAME in row: score = row[Data.CATEGORY_NAME]
         elif Data.SCORE_NAME in row: score = row[Data.SCORE_NAME]
         else: score = 0
         category = DataSet.CategoryFromScore(score)
         data.append(Data(text, category=category, score=score))
     return DataSet(data)

예제 #6

0

파일 보기

nun_days = 910  #numero de candles
batch_size = 1  #divisao em blocos
#¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨
#instanciar objetos
"""
Sobre os dados

Estes dados são informações retiradas da BMF Bovespa, o periodo é Intraday,além das informações que formam um candlestick, 
são associados as colunas, informações de indicadores técnicos.
Index(['Hora', 'dif', 'retracao +', 'retracao -', 'RSI', 'M22M44', 'M22M66',
       'M66M44', 'ADX', 'ATR', 'Momentum', 'CCI', 'Bears', 'Bulls', 'Stock1',
       'Stock2', 'Wilians', 'Std', 'MFI', 'target'],
      dtype='object')
O rótulos são iformações que consideram a tendência do preços, 1: compra, 2: venda e 0:sem operação
"""
data = Data(nun_days, batch_size)
entrada, entrada_trader, base, media, std = data.import_data()
labels = Labels()
data_labels = labels.index_labels(base, entrada)
print('Nome das colunas: ', data_labels.columns)
print('Quantidade de cada categória: ', data_labels.target.value_counts())
"""
Normalização dos dados

A padronização de dados dá aos dados média zero e variação unitária, é uma boa prática,
especialmente para algoritmos como KNN, que é baseado na distância dos casos:
"""
#separando os dados
colunas = [
    'Hora', 'dif', 'retracao +', 'retracao -', 'RSI', 'M22M44', 'M22M66',
    'M66M44', 'ADX', 'ATR', 'Momentum', 'CCI', 'Bears', 'Bulls', 'Stock1',

예제 #7

0

파일 보기

import seaborn as sns
sns.set()
import matplotlib.pyplot as plt
from Timer import Timer
import lmfit

# Construct discretized domain object for hybrid model
domain = Domain(name='Domain')
domain.add_axis(x_min=5,
                x_max=100,
                m=30,
                disc_by='FeretMean',
                name='FeretMean')

# Create data-set and set up data-shuffler
data = Data(case_id='Laboratory lactose case study')
data.load_from_pickle(
    'C:/Users/rfjoni/PycharmProjects/ParticleModel/projects/CACE_cases/CACE_lactose_study/lactose'
)
data.batches[2].batch_id = 'Batch 1'
data.batches[3].batch_id = 'Batch 2'

# Convert time and temperature data to polynomial fit
# Batch 1
t_batch1 = [
    (measurement.time - data.batches[2].measurements[0].time).total_seconds()
    for measurement in data.batches[2].measurements
]
T_batch1 = [
    measurement.external_sensors[2].value
    for measurement in data.batches[2].measurements

예제 #8

0

파일 보기

def train():
    data = Data()
    data.read_data(filepath='data/train.csv',
                   train_size=TRAIN_SIZE,
                   validation_size=VALIDATION_SIZE,
                   convert_to_one_hot=True)
    #data.train.display_digit()
    sess = tf.InteractiveSession()

    def variable_summaries(var):
        """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
        with tf.name_scope('summaries'):
            mean = tf.reduce_mean(var)
            tf.summary.scalar('mean', mean)
            with tf.name_scope('stddev'):
                stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
            tf.summary.scalar('stddev', stddev)
            tf.summary.scalar('max', tf.reduce_max(var))
            tf.summary.scalar('min', tf.reduce_min(var))
            tf.summary.histogram('histogram', var)

    with tf.name_scope('input'):
        input_layer = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE])
        output_layer = tf.placeholder(tf.float32, shape=[None, N_CLASSES])

    with tf.name_scope('reshape_input'):
        image_shaped_input = tf.reshape(input_layer, [-1, 28, 28, 1])
        tf.summary.image('input', image_shaped_input)

    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)

    def bias_variable(shape):
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)

    def convolution_2d(input_tensor,
                       input_dimension,
                       nb_filter,
                       filter_size,
                       name,
                       activation=tf.nn.relu):
        with tf.name_scope(name):
            with tf.name_scope('weights'):
                weights = weight_variable(
                    [filter_size, filter_size, input_dimension, nb_filter])
                variable_summaries(weights)
            with tf.name_scope('biases'):
                biases = bias_variable([nb_filter])
                variable_summaries(biases)
            with tf.name_scope('preactivation'):
                preactivate = conv2d(input_tensor, weights) + biases  # !!!
                tf.summary.histogram('pre-activation', preactivate)
            activations = activation(preactivate, name='activation')
            tf.summary.histogram('activations', activations)
            return activations

    def conv2d(input_tensor, weights):
        return tf.nn.conv2d(input_tensor,
                            weights,
                            strides=[1, 2, 2, 1],
                            padding='SAME')

    def max_pool_2d(input_tensor, kernel_size, name):
        with tf.name_scope(name):
            return tf.nn.max_pool(
                input_tensor,
                ksize=[1, 2, 2, 1],  # kernel size?
                strides=[1, 2, 2, 1],
                padding='SAME')

    def fully_connected(input_tensor, image_size, nb_filter, n_units, name,
                        activation):
        with tf.name_scope(name):
            with tf.name_scope('weights'):
                weights = weight_variable(
                    [image_size * image_size * nb_filter, n_units])
                variable_summaries(weights)
            with tf.name_scope('biases'):
                biases = bias_variable([n_units])
                variable_summaries(biases)
            with tf.name_scope('preactivation'):
                input_tensor_flat = tf.reshape(
                    input_tensor, [-1, image_size * image_size * nb_filter])
                preactivate = tf.matmul(input_tensor_flat,
                                        weights) + biases  # same as convo
                tf.summary.histogram('pre-activation', preactivate)
            if activation == 'NONE':
                return preactivate
            else:
                activations = activation(preactivate, name='activation')
                tf.summary.histogram('activations', activations)
                return activations

    with tf.name_scope('neural_network_architecture'):
        conv_1 = convolution_2d(image_shaped_input,
                                1,
                                nb_filter=16,
                                filter_size=3,
                                activation=tf.nn.relu,
                                name='convolutional_layer_1')
        conv_2 = convolution_2d(conv_1,
                                16,
                                nb_filter=32,
                                filter_size=3,
                                activation=tf.nn.relu,
                                name='convolutional_layer_2')
        pool_1 = max_pool_2d(conv_2, kernel_size=2, name='pool_layer_1')
        conv_3 = convolution_2d(pool_1,
                                32,
                                nb_filter=64,
                                filter_size=3,
                                activation=tf.nn.relu,
                                name='convolutional_layer_3')
        conv_4 = convolution_2d(conv_3,
                                64,
                                nb_filter=128,
                                filter_size=3,
                                activation=tf.nn.relu,
                                name='convolutional_layer_4')
        pool_2 = max_pool_2d(conv_4, kernel_size=2, name='pool_layer_2')
        fc_1 = fully_connected(pool_2,
                               1,
                               nb_filter=128,
                               n_units=2048,
                               activation=tf.nn.relu,
                               name='fully_connected_1')
        fc_2 = fully_connected(fc_1,
                               1,
                               nb_filter=2048,
                               n_units=512,
                               activation=tf.nn.relu,
                               name='fully_connected_2')

        with tf.name_scope('dropout'):
            keep_prob = tf.placeholder(tf.float32)
            tf.summary.scalar('dropout_keep_probability', keep_prob)
            dropped = tf.nn.dropout(fc_2, keep_prob)

        y = fully_connected(dropped,
                            1,
                            nb_filter=512,
                            n_units=10,
                            activation=tf.nn.softmax,
                            name='fully_connected_3')

    with tf.name_scope('loss_function'):
        diff = tf.nn.softmax_cross_entropy_with_logits(labels=output_layer,
                                                       logits=y)
        with tf.name_scope('total'):
            cross_entropy = tf.reduce_mean(diff)
    tf.summary.scalar('cross_entropy', cross_entropy)

    with tf.name_scope('optimizer'):
        train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(
            cross_entropy)

    with tf.name_scope('accuracy'):
        with tf.name_scope('correct_prediction'):
            correct_prediction = tf.equal(tf.argmax(y, 1),
                                          tf.argmax(output_layer, 1))
        with tf.name_scope('accuracy'):
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.summary.scalar('accuracy', accuracy)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(LOG_DIR + '/train', sess.graph)
    test_writer = tf.summary.FileWriter(LOG_DIR + '/test')
    tf.global_variables_initializer().run()

    print("\nTraining the network...")
    t = trange(EPOCHS * data.train.images.shape[0] // BATCH_SIZE)
    for i in t:
        # selecting a batch
        batch_x, batch_y = data.train.batch(BATCH_SIZE)
        # evaluating
        if i % 10 == 0:
            summary, acc = sess.run(
                [merged, accuracy],
                feed_dict={
                    input_layer: data.validation.images,
                    output_layer: data.validation.labels,
                    keep_prob: 1.0
                })
            test_writer.add_summary(summary, i)
            print('Accuracy at step %s: %s' % (i, acc))
        else:  # Record train set summaries, and train
            if i % 100 == 99:  # Record execution stats
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()
                summary, _ = sess.run(
                    [merged, train_step],
                    feed_dict={
                        input_layer: data.train.images,
                        output_layer: data.train.labels,
                        keep_prob: DROP_OUT
                    },
                    options=run_options,
                    run_metadata=run_metadata)
                train_writer.add_run_metadata(run_metadata, 'step%03d' % i)
                train_writer.add_summary(summary, i)
                print('Adding run metadata for', i)
            else:  # Record a summary
                summary, _ = sess.run(
                    [merged, train_step],
                    feed_dict={
                        input_layer: data.train.images,
                        output_layer: data.train.labels,
                        keep_prob: DROP_OUT
                    })
                train_writer.add_summary(summary, i)
    train_writer.close()
    test_writer.close()

    def getActivations(layer, stimuli):
        units = sess.run(layer,
                         feed_dict={
                             input_layer: np.reshape(stimuli, [1, 784],
                                                     order='F'),
                             keep_prob: 1.0
                         })
        plotNNFilter(units)

    def plotNNFilter(units):
        filters = units.shape[3]
        plt.figure(1, figsize=(20, 20))
        n_columns = 6
        n_rows = math.ceil(filters / n_columns) + 1
        for i in range(filters):
            plt.subplot(n_rows, n_columns, i + 1)
            plt.title('Filter ' + str(i))
            plt.imshow(units[0, :, :, i], interpolation="nearest", cmap="gray")

    imageToUse = data.train.images[0]
    data.train.display_digit()
    plt.imshow(np.reshape(imageToUse, [28, 28]),
               interpolation="nearest",
               cmap="gray")
    plt.show()
    #getActivations(conv_1, imageToUse)
    #getActivations(conv_2, imageToUse)
    #getActivations(conv_3, imageToUse)
    getActivations(conv_4, imageToUse)
    print('h')
    plt.show()

예제 #9

0

파일 보기

    def start(self):
        # tf Graph
        self.x = tf.placeholder("float", [
            None, self.params["post_padding_size"],
            self.params["comment_padding_size"], self.params["word2vec_dim"]
        ],
                                name="input_x")
        self.y_sentiment = tf.placeholder("float", [
            None, self.params["post_padding_size"],
            self.params["n_classes_sentiment"]
        ],
                                          name="input_y_sentiment")
        self.y_topics = tf.placeholder("float", [
            None, self.params["post_padding_size"],
            self.params["n_classes_topics"]
        ],
                                       name="input_y_topics")
        self.y_emotion = tf.placeholder("float", [
            None, self.params["post_padding_size"],
            self.params["n_classes_emotion"]
        ],
                                        name="input_y_emotion")
        self.y_speech_acts = tf.placeholder("float", [
            None, self.params["post_padding_size"],
            self.params["n_classes_speech_acts"]
        ],
                                            name="input_y_speech_acts")

        self.keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
        self.sequence_length = tf.placeholder(tf.int32, [None])

        fully_connected_params = {
            "out_sentiment_w":
            tf.Variable(
                tf.random_normal([
                    self.params["n_hidden"], self.params["n_classes_sentiment"]
                ])),
            "out_topics_w":
            tf.Variable(
                tf.random_normal(
                    [self.params["n_hidden"],
                     self.params["n_classes_topics"]])),
            "out_emotion_w":
            tf.Variable(
                tf.random_normal([
                    self.params["n_hidden"], self.params["n_classes_emotion"]
                ])),
            "out_speech_acts_w":
            tf.Variable(
                tf.random_normal([
                    self.params["n_hidden"],
                    self.params["n_classes_speech_acts"]
                ])),
            "out_sentiment_b":
            tf.Variable(tf.random_normal([self.params["n_classes_sentiment"]
                                          ])),
            "out_topics_b":
            tf.Variable(tf.random_normal([self.params["n_classes_topics"]])),
            "out_emotion_b":
            tf.Variable(tf.random_normal([self.params["n_classes_emotion"]])),
            "out_speech_acts_b":
            tf.Variable(
                tf.random_normal([self.params["n_classes_speech_acts"]]))
        }

        self.lstm = Lstm(params=self.params,
                         fully_connected_params=fully_connected_params)

        # get predictions
        self.predictions = self.lstm.model(
            x=self.cnn.model(self.x, self.keep_prob),
            sequence_length=self.sequence_length,
            keep_prob=self.keep_prob)

        # define loss
        with tf.name_scope("loss_sentiment"):
            self.cost_sentiment = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(
                    logits=self.predictions["prediction_sentiment"],
                    labels=self.y_sentiment))

        with tf.name_scope("loss_topics"):
            self.cost_topics = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=self.predictions["prediction_topics"],
                    labels=self.y_topics))

        with tf.name_scope("loss_emotions"):
            self.cost_emotions = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=self.predictions["prediction_emotion"],
                    labels=self.y_emotion))

        with tf.name_scope("loss_speech_acts"):
            self.cost_speech_acts = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=self.predictions["prediction_speech_acts"],
                    labels=self.y_speech_acts))

        # define optimizer
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.params["learning_rate"]).minimize(
                self.cost_sentiment + self.cost_topics + self.cost_emotions +
                self.cost_speech_acts)

        # evaluate model
        with tf.name_scope("accuracy_sentiment"):
            correct_pred_sentiment = tf.equal(
                tf.argmax(self.predictions["prediction_sentiment"], 1),
                tf.argmax(self.y_sentiment, 1))
            self.accuracy_sentiment = tf.reduce_mean(
                tf.cast(correct_pred_sentiment, tf.float32))

        with tf.name_scope("accuracy_topics"):
            correct_pred_topics = tf.equal(
                tf.round(tf.nn.sigmoid(self.predictions["prediction_topics"])),
                tf.round(self.y_topics))
            self.accuracy_topics = tf.reduce_mean(
                tf.cast(correct_pred_topics, tf.float32))

        with tf.name_scope("accuracy_emotion"):
            correct_pred_emotion = tf.equal(
                tf.round(tf.nn.sigmoid(
                    self.predictions["prediction_emotion"])),
                tf.round(self.y_emotion))
            self.accuracy_emotion = tf.reduce_mean(
                tf.cast(correct_pred_emotion, tf.float32))

        with tf.name_scope("accuracy_speech_acts"):
            correct_pred_speech_acts = tf.equal(
                tf.round(
                    tf.nn.sigmoid(self.predictions["prediction_speech_acts"])),
                tf.round(self.y_speech_acts))
            self.accuracy_speech_acts = tf.reduce_mean(
                tf.cast(correct_pred_speech_acts, tf.float32))

        # initializing the variables
        self.init = tf.global_variables_initializer()
        # 'Saver' op to save and restore all the variables
        self.saver = tf.train.Saver()

        # get data object
        self.data = Data(
            filename='data/word2vec/wiki.hr.vec',
            comment_padding_size=self.params["comment_padding_size"],
            post_padding_size=self.params["post_padding_size"],
            word2vec_dim=self.params["word2vec_dim"],
            binary_sentiment=self.params["binary_sentiment"])

        self.runs.create_run()
        # START LEARNING!!!
        self.learn()

예제 #10

0

파일 보기

                                              'agglomeration': 1/1000, 'breakage': 1/1000})

# Define model system
system = System(case="Laboratory lactose case study", domain=domain, ode_settings=ode_settings,
                loss_settings=loss_settings, rate_settings=rate_settings, dilution=False,
                regularization=1, normalize=True)

# Adding sensors
system.add_sensor(name='Temperature', measured=True, controlled=True, unit='C')
system.add_sensor(name='Concentration', measured=True, controlled=False, unit='g/µL')

# Activate phenomena
system.activate_phenomena(['nucleation', 'growth'])

# Create data-set and set up data-shuffler
data = Data(case_id='Demo data')
data.load_from_pickle('demo_data')
time_series_pair = TimeSeriesPair(data=data, system=system)

# Split training and validation data
data.set_batch_pool(pool_batch_id=['Demo batch 0', 'Demo batch 1', 'Demo batch 2', 'Demo batch 3'], pool_type='Training')
data.set_batch_pool(pool_batch_id=['Demo batch 4', 'Demo batch 5', 'Demo batch 6', 'Demo batch 7', 'Demo batch 8', 'Demo batch 9'], pool_type='Validation')
data.set_batch_pool(pool_batch_id=['Demo batch 4'], pool_type='Test')

# Set up hybrid training model
hybrid_model = HybridModel(system=system)

# Compile hybrid model
hybrid_model.training_model.compile(loss=hybrid_model.loss_model.loss, optimizer='Adam')

# Generate shuffled training and evaluation data