Ejemplo n.º 1
0
def main():
    # Fetch input parameters.
    args = return_args()

    test_input = args.test_data_url

    X, y, x = Data.load_data(args.training_data_file, test_input)
    # Plot data
    Plot.plot_data(X, y)
    # SVM model.
    model = SVMClassifier().train(X, y)
    # print training details
    print('\nmodel.X: {}\nmodel.y: {}\nmodel.alphas: {}\nmodel.w: {}\n'.format(
        model.X, model.y, model.alphas, model.w))

    # prediction.
    prediction = SVMClassifier().classify(x, model)
    # print test details
    print('\nx: {}\nprediction: {}\n'.format(x, prediction))

    # Plot boundary
    Plot.plot_boundary(X, y, x, model)

    print('\nProcessed... {}\n\nURL Classified as: {}\n'.format(
        test_input, 'wrong url' if prediction == 0 else 'correct url'))
    def FromJSON(path: str,
                 keywordsPath: str,
                 on_generate: Callable[Data, int, int] = None) -> DataSet:
        rawdata = []
        keywords = {}
        with open(keywordsPath, 'r', encoding='utf-8') as f:
            keywords = json.load(f)
        with open(path, 'r', encoding='utf-8') as f:
            rawdata = json.load(f)
        if type(rawdata) is not list: rawdata = []
        data = []
        index = 0
        raw_len = len(rawdata)
        for d in rawdata:
            if type(d) is not dict: continue
            source = d.get('source', '???').lower()
            title = d.get('title', '').lower()
            text = d.get('text', '').lower()
            score = 0
            for negative in keywords.get('negative', []):
                if text.find(negative.lower()) != -1: score -= 1

            for positive in keywords.get('positive', []):
                if text.find(positive.lower()) != -1: score += 1
            category = DataSet.CategoryFromScore(score)
            result = Data(text, category, source, title, score)
            if on_generate is not None: on_generate(result, index, raw_len)
            data.append(result)
            index += 1
        return DataSet(data)
 def __init__(self,conf={}):
     self.conf = conf
     self.dataHandler = Data(conf)
     self.allCode = self.dataHandler.get("allCode")
     self.date = conf.get('date',Date.getDate()) #抓取指定日期的detail
     self.sourceName=conf.get('SOURCE_NAME')
     self.threadNum = int(conf.get('THREAD_NUM',THREAD_NUM)) # 采用多线程模式抓取数据时的线程数
 def FromAny(text: Union[str, Iterable[str], Iterable[Data], pd.DataFrame],
             category: Union[str, Iterable[str]] = None) -> DataSet:
     data = None
     if type(text) is str and type(category) is str:
         data = DataSet([Data(text, category)])
     elif isinstance(text, pd.DataFrame):
         data = DataSet.FromDataFrame(text)
     elif isinstance(text, DataSet):
         data = text
     elif isinstance(text, IterableType):
         if type(category) is str:
             data = DataSet([Data(x, category) for x in text])
         elif isinstance(category, IterableType):
             data = DataSet(
                 [Data(text[i], category[i]) for i in range(len(text))])
     if not data:
         raise Exception(
             'The given input is not supported: <{}, {}>.\nUse <str, str>, <str[], str|str[]>, <pandas.Dataframe[words, word_count] | Data[], Unknown>'
             .format(type(text), type(category)))
     return data
Ejemplo n.º 5
0
class Main():
    #main holds the current running class
    main = None

    #creates all class objects
    config = Config()
    load_data = Load_data()
    data = Data()
    under = Under()

    #array for all databases
    databases_names = []

    for database_name in os.listdir('databases'):
        databases_names.append("databases/" + database_name)

    def main(self, main):
        self.main = main
        self.main.config.config(self.main)
        self.main.data.data(self.config)
        #self.main.load_data.load_data(self.main, self.config)
        self.main.check_load_files()

    def new_matches(self, name):
        if (input("Would you like to collect data by " + name +
                  " again: ").lower() == "yes"):
            return True
        else:
            return False

    def check_load_files(self):
        matchesbypermno = False
        up = False
        for file in os.listdir("."):
            if (file == ".matchesbypermno"):
                matchesbypermno = True
            elif (file == ".up"):
                up = True
        if (not matchesbypermno):
            self.main.data.match_by_permno_and_date_wrds_and_xls_or_txt(
                "permno", "permno", "crsp.wrds", "final.xls")
            up = False
        elif (self.new_matches("permno and date")):
            self.main.data.match_by_permno_and_date_wrds_and_xls_or_txt(
                "permno", "permno", "crsp.wrds", "final.xls")
            up = False
        if (not up):
            self.main.under.wrds_and_xml_or_txt_under(self.config, self.data)
        elif (self.new_up("Under Pricing?")):
            self.main.under.wrds_and_xml_or_txt_under(self.config, self.data)
 def FromDataFrame(dataframe: pd.DataFrame) -> DataSet:
     data = []
     dataview = dataframe.loc
     for index in dataframe.index:
         row = dataview[index]
         text = str.join(' ', [
             i * row[i] for i in row.index
             if i != Data.CATEGORY_NAME and i != Data.SCORE_NAME
         ])
         if Data.CATEGORY_NAME in row: score = row[Data.CATEGORY_NAME]
         elif Data.SCORE_NAME in row: score = row[Data.SCORE_NAME]
         else: score = 0
         category = DataSet.CategoryFromScore(score)
         data.append(Data(text, category=category, score=score))
     return DataSet(data)
Ejemplo n.º 7
0
def get_info():
    vehicletable = Data('Vehicles.txt', [
        'License Plate', 'Manufacturer', 'Model', 'Year', 'Location',
        'Category'
    ], [str, str, str, int, str, str])
    ordertable = Data('Orders.txt', [
        'Order ID', 'Customer', 'Vehicle', 'Start Date', 'End Date',
        'Extra Insurance', 'GPS'
    ], [ID, str, str, datetime, datetime, bool, bool])

    for item in ordertable.get_rows():
        order_list.append(item.values())
    for item in vehicletable.get_rows():
        car_list.append(item.values())
    for item in ordertable.get_rows():
        if item[3].value() <= datetime.now() < (item[4].value() +
                                                timedelta(days=1)):
            occupied.append(item[2].value())
class MultiThreadDownloader:
    def __init__(self,conf={}):
        self.conf = conf
        self.dataHandler = Data(conf)
        self.allCode = self.dataHandler.get("allCode")
        self.date = conf.get('date',Date.getDate()) #抓取指定日期的detail
        self.sourceName=conf.get('SOURCE_NAME')
        self.threadNum = int(conf.get('THREAD_NUM',THREAD_NUM)) # 采用多线程模式抓取数据时的线程数

    def download(self):
        """
        >>> app=MultiThreadDownloader(conf)
        >>> app.stock.allCode

        >>> app.download()
        True
        """
        logging.debug("Start downloading data...\nCrawl mode is mutil.")
        conf = {}
        conf.update(self.conf)
        conf['handle']=self.handle
        conf['date'] = self.date
        oQueue = queue.Queue()
        for code in self.allCode:
            if type(code) == int:
                code = Util.getCode(code)
            oQueue.put(code)
        for i in range(self.threadNum):
            conf["queue"]=oQueue
            multiThreadCrawlHandler = MultiThreadHandler(conf = conf)
            multiThreadCrawlHandler.setDaemon(True)
            multiThreadCrawlHandler.start()
        oQueue.join()               
        return True

    def handle(self,code,date):
        raise NotImplementedError
Ejemplo n.º 9
0
def train():
    data = Data()
    data.read_data(filepath='data/train.csv',
                   train_size=TRAIN_SIZE,
                   validation_size=VALIDATION_SIZE,
                   convert_to_one_hot=True)
    #data.train.display_digit()
    sess = tf.InteractiveSession()

    def variable_summaries(var):
        """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
        with tf.name_scope('summaries'):
            mean = tf.reduce_mean(var)
            tf.summary.scalar('mean', mean)
            with tf.name_scope('stddev'):
                stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
            tf.summary.scalar('stddev', stddev)
            tf.summary.scalar('max', tf.reduce_max(var))
            tf.summary.scalar('min', tf.reduce_min(var))
            tf.summary.histogram('histogram', var)

    with tf.name_scope('input'):
        input_layer = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE])
        output_layer = tf.placeholder(tf.float32, shape=[None, N_CLASSES])

    with tf.name_scope('reshape_input'):
        image_shaped_input = tf.reshape(input_layer, [-1, 28, 28, 1])
        tf.summary.image('input', image_shaped_input)

    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)

    def bias_variable(shape):
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)

    def convolution_2d(input_tensor,
                       input_dimension,
                       nb_filter,
                       filter_size,
                       name,
                       activation=tf.nn.relu):
        with tf.name_scope(name):
            with tf.name_scope('weights'):
                weights = weight_variable(
                    [filter_size, filter_size, input_dimension, nb_filter])
                variable_summaries(weights)
            with tf.name_scope('biases'):
                biases = bias_variable([nb_filter])
                variable_summaries(biases)
            with tf.name_scope('preactivation'):
                preactivate = conv2d(input_tensor, weights) + biases  # !!!
                tf.summary.histogram('pre-activation', preactivate)
            activations = activation(preactivate, name='activation')
            tf.summary.histogram('activations', activations)
            return activations

    def conv2d(input_tensor, weights):
        return tf.nn.conv2d(input_tensor,
                            weights,
                            strides=[1, 2, 2, 1],
                            padding='SAME')

    def max_pool_2d(input_tensor, kernel_size, name):
        with tf.name_scope(name):
            return tf.nn.max_pool(
                input_tensor,
                ksize=[1, 2, 2, 1],  # kernel size?
                strides=[1, 2, 2, 1],
                padding='SAME')

    def fully_connected(input_tensor, image_size, nb_filter, n_units, name,
                        activation):
        with tf.name_scope(name):
            with tf.name_scope('weights'):
                weights = weight_variable(
                    [image_size * image_size * nb_filter, n_units])
                variable_summaries(weights)
            with tf.name_scope('biases'):
                biases = bias_variable([n_units])
                variable_summaries(biases)
            with tf.name_scope('preactivation'):
                input_tensor_flat = tf.reshape(
                    input_tensor, [-1, image_size * image_size * nb_filter])
                preactivate = tf.matmul(input_tensor_flat,
                                        weights) + biases  # same as convo
                tf.summary.histogram('pre-activation', preactivate)
            if activation == 'NONE':
                return preactivate
            else:
                activations = activation(preactivate, name='activation')
                tf.summary.histogram('activations', activations)
                return activations

    with tf.name_scope('neural_network_architecture'):
        conv_1 = convolution_2d(image_shaped_input,
                                1,
                                nb_filter=16,
                                filter_size=3,
                                activation=tf.nn.relu,
                                name='convolutional_layer_1')
        conv_2 = convolution_2d(conv_1,
                                16,
                                nb_filter=32,
                                filter_size=3,
                                activation=tf.nn.relu,
                                name='convolutional_layer_2')
        pool_1 = max_pool_2d(conv_2, kernel_size=2, name='pool_layer_1')
        conv_3 = convolution_2d(pool_1,
                                32,
                                nb_filter=64,
                                filter_size=3,
                                activation=tf.nn.relu,
                                name='convolutional_layer_3')
        conv_4 = convolution_2d(conv_3,
                                64,
                                nb_filter=128,
                                filter_size=3,
                                activation=tf.nn.relu,
                                name='convolutional_layer_4')
        pool_2 = max_pool_2d(conv_4, kernel_size=2, name='pool_layer_2')
        fc_1 = fully_connected(pool_2,
                               1,
                               nb_filter=128,
                               n_units=2048,
                               activation=tf.nn.relu,
                               name='fully_connected_1')
        fc_2 = fully_connected(fc_1,
                               1,
                               nb_filter=2048,
                               n_units=512,
                               activation=tf.nn.relu,
                               name='fully_connected_2')

        with tf.name_scope('dropout'):
            keep_prob = tf.placeholder(tf.float32)
            tf.summary.scalar('dropout_keep_probability', keep_prob)
            dropped = tf.nn.dropout(fc_2, keep_prob)

        y = fully_connected(dropped,
                            1,
                            nb_filter=512,
                            n_units=10,
                            activation=tf.nn.softmax,
                            name='fully_connected_3')

    with tf.name_scope('loss_function'):
        diff = tf.nn.softmax_cross_entropy_with_logits(labels=output_layer,
                                                       logits=y)
        with tf.name_scope('total'):
            cross_entropy = tf.reduce_mean(diff)
    tf.summary.scalar('cross_entropy', cross_entropy)

    with tf.name_scope('optimizer'):
        train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(
            cross_entropy)

    with tf.name_scope('accuracy'):
        with tf.name_scope('correct_prediction'):
            correct_prediction = tf.equal(tf.argmax(y, 1),
                                          tf.argmax(output_layer, 1))
        with tf.name_scope('accuracy'):
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.summary.scalar('accuracy', accuracy)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(LOG_DIR + '/train', sess.graph)
    test_writer = tf.summary.FileWriter(LOG_DIR + '/test')
    tf.global_variables_initializer().run()

    print("\nTraining the network...")
    t = trange(EPOCHS * data.train.images.shape[0] // BATCH_SIZE)
    for i in t:
        # selecting a batch
        batch_x, batch_y = data.train.batch(BATCH_SIZE)
        # evaluating
        if i % 10 == 0:
            summary, acc = sess.run(
                [merged, accuracy],
                feed_dict={
                    input_layer: data.validation.images,
                    output_layer: data.validation.labels,
                    keep_prob: 1.0
                })
            test_writer.add_summary(summary, i)
            print('Accuracy at step %s: %s' % (i, acc))
        else:  # Record train set summaries, and train
            if i % 100 == 99:  # Record execution stats
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()
                summary, _ = sess.run(
                    [merged, train_step],
                    feed_dict={
                        input_layer: data.train.images,
                        output_layer: data.train.labels,
                        keep_prob: DROP_OUT
                    },
                    options=run_options,
                    run_metadata=run_metadata)
                train_writer.add_run_metadata(run_metadata, 'step%03d' % i)
                train_writer.add_summary(summary, i)
                print('Adding run metadata for', i)
            else:  # Record a summary
                summary, _ = sess.run(
                    [merged, train_step],
                    feed_dict={
                        input_layer: data.train.images,
                        output_layer: data.train.labels,
                        keep_prob: DROP_OUT
                    })
                train_writer.add_summary(summary, i)
    train_writer.close()
    test_writer.close()

    def getActivations(layer, stimuli):
        units = sess.run(layer,
                         feed_dict={
                             input_layer: np.reshape(stimuli, [1, 784],
                                                     order='F'),
                             keep_prob: 1.0
                         })
        plotNNFilter(units)

    def plotNNFilter(units):
        filters = units.shape[3]
        plt.figure(1, figsize=(20, 20))
        n_columns = 6
        n_rows = math.ceil(filters / n_columns) + 1
        for i in range(filters):
            plt.subplot(n_rows, n_columns, i + 1)
            plt.title('Filter ' + str(i))
            plt.imshow(units[0, :, :, i], interpolation="nearest", cmap="gray")

    imageToUse = data.train.images[0]
    data.train.display_digit()
    plt.imshow(np.reshape(imageToUse, [28, 28]),
               interpolation="nearest",
               cmap="gray")
    plt.show()
    #getActivations(conv_1, imageToUse)
    #getActivations(conv_2, imageToUse)
    #getActivations(conv_3, imageToUse)
    getActivations(conv_4, imageToUse)
    print('h')
    plt.show()
Ejemplo n.º 10
0
 def __init__(self):
     Data.__init__(self)
Ejemplo n.º 11
0
    def start(self):
        # tf Graph
        self.x = tf.placeholder("float", [
            None, self.params["post_padding_size"],
            self.params["comment_padding_size"], self.params["word2vec_dim"]
        ],
                                name="input_x")
        self.y_sentiment = tf.placeholder("float", [
            None, self.params["post_padding_size"],
            self.params["n_classes_sentiment"]
        ],
                                          name="input_y_sentiment")
        self.y_topics = tf.placeholder("float", [
            None, self.params["post_padding_size"],
            self.params["n_classes_topics"]
        ],
                                       name="input_y_topics")
        self.y_emotion = tf.placeholder("float", [
            None, self.params["post_padding_size"],
            self.params["n_classes_emotion"]
        ],
                                        name="input_y_emotion")
        self.y_speech_acts = tf.placeholder("float", [
            None, self.params["post_padding_size"],
            self.params["n_classes_speech_acts"]
        ],
                                            name="input_y_speech_acts")

        self.keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
        self.sequence_length = tf.placeholder(tf.int32, [None])

        fully_connected_params = {
            "out_sentiment_w":
            tf.Variable(
                tf.random_normal([
                    self.params["n_hidden"], self.params["n_classes_sentiment"]
                ])),
            "out_topics_w":
            tf.Variable(
                tf.random_normal(
                    [self.params["n_hidden"],
                     self.params["n_classes_topics"]])),
            "out_emotion_w":
            tf.Variable(
                tf.random_normal([
                    self.params["n_hidden"], self.params["n_classes_emotion"]
                ])),
            "out_speech_acts_w":
            tf.Variable(
                tf.random_normal([
                    self.params["n_hidden"],
                    self.params["n_classes_speech_acts"]
                ])),
            "out_sentiment_b":
            tf.Variable(tf.random_normal([self.params["n_classes_sentiment"]
                                          ])),
            "out_topics_b":
            tf.Variable(tf.random_normal([self.params["n_classes_topics"]])),
            "out_emotion_b":
            tf.Variable(tf.random_normal([self.params["n_classes_emotion"]])),
            "out_speech_acts_b":
            tf.Variable(
                tf.random_normal([self.params["n_classes_speech_acts"]]))
        }

        self.lstm = Lstm(params=self.params,
                         fully_connected_params=fully_connected_params)

        # get predictions
        self.predictions = self.lstm.model(
            x=self.cnn.model(self.x, self.keep_prob),
            sequence_length=self.sequence_length,
            keep_prob=self.keep_prob)

        # define loss
        with tf.name_scope("loss_sentiment"):
            self.cost_sentiment = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(
                    logits=self.predictions["prediction_sentiment"],
                    labels=self.y_sentiment))

        with tf.name_scope("loss_topics"):
            self.cost_topics = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=self.predictions["prediction_topics"],
                    labels=self.y_topics))

        with tf.name_scope("loss_emotions"):
            self.cost_emotions = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=self.predictions["prediction_emotion"],
                    labels=self.y_emotion))

        with tf.name_scope("loss_speech_acts"):
            self.cost_speech_acts = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=self.predictions["prediction_speech_acts"],
                    labels=self.y_speech_acts))

        # define optimizer
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.params["learning_rate"]).minimize(
                self.cost_sentiment + self.cost_topics + self.cost_emotions +
                self.cost_speech_acts)

        # evaluate model
        with tf.name_scope("accuracy_sentiment"):
            correct_pred_sentiment = tf.equal(
                tf.argmax(self.predictions["prediction_sentiment"], 1),
                tf.argmax(self.y_sentiment, 1))
            self.accuracy_sentiment = tf.reduce_mean(
                tf.cast(correct_pred_sentiment, tf.float32))

        with tf.name_scope("accuracy_topics"):
            correct_pred_topics = tf.equal(
                tf.round(tf.nn.sigmoid(self.predictions["prediction_topics"])),
                tf.round(self.y_topics))
            self.accuracy_topics = tf.reduce_mean(
                tf.cast(correct_pred_topics, tf.float32))

        with tf.name_scope("accuracy_emotion"):
            correct_pred_emotion = tf.equal(
                tf.round(tf.nn.sigmoid(
                    self.predictions["prediction_emotion"])),
                tf.round(self.y_emotion))
            self.accuracy_emotion = tf.reduce_mean(
                tf.cast(correct_pred_emotion, tf.float32))

        with tf.name_scope("accuracy_speech_acts"):
            correct_pred_speech_acts = tf.equal(
                tf.round(
                    tf.nn.sigmoid(self.predictions["prediction_speech_acts"])),
                tf.round(self.y_speech_acts))
            self.accuracy_speech_acts = tf.reduce_mean(
                tf.cast(correct_pred_speech_acts, tf.float32))

        # initializing the variables
        self.init = tf.global_variables_initializer()
        # 'Saver' op to save and restore all the variables
        self.saver = tf.train.Saver()

        # get data object
        self.data = Data(
            filename='data/word2vec/wiki.hr.vec',
            comment_padding_size=self.params["comment_padding_size"],
            post_padding_size=self.params["post_padding_size"],
            word2vec_dim=self.params["word2vec_dim"],
            binary_sentiment=self.params["binary_sentiment"])

        self.runs.create_run()
        # START LEARNING!!!
        self.learn()
Ejemplo n.º 12
0
                                              'agglomeration': 1/1000, 'breakage': 1/1000})

# Define model system
system = System(case="Laboratory lactose case study", domain=domain, ode_settings=ode_settings,
                loss_settings=loss_settings, rate_settings=rate_settings, dilution=False,
                regularization=1, normalize=True)

# Adding sensors
system.add_sensor(name='Temperature', measured=True, controlled=True, unit='C')
system.add_sensor(name='Concentration', measured=True, controlled=False, unit='g/µL')

# Activate phenomena
system.activate_phenomena(['nucleation', 'growth'])

# Create data-set and set up data-shuffler
data = Data(case_id='Demo data')
data.load_from_pickle('demo_data')
time_series_pair = TimeSeriesPair(data=data, system=system)

# Split training and validation data
data.set_batch_pool(pool_batch_id=['Demo batch 0', 'Demo batch 1', 'Demo batch 2', 'Demo batch 3'], pool_type='Training')
data.set_batch_pool(pool_batch_id=['Demo batch 4', 'Demo batch 5', 'Demo batch 6', 'Demo batch 7', 'Demo batch 8', 'Demo batch 9'], pool_type='Validation')
data.set_batch_pool(pool_batch_id=['Demo batch 4'], pool_type='Test')

# Set up hybrid training model
hybrid_model = HybridModel(system=system)

# Compile hybrid model
hybrid_model.training_model.compile(loss=hybrid_model.loss_model.loss, optimizer='Adam')

# Generate shuffled training and evaluation data
Ejemplo n.º 13
0
class Stock(object):
    def __init__(self,conf={}):
        self.conf=conf
        self.code=None
        self.date=None
        self.data=Data(self.conf) #store and cache data

    def __iter__(self):
        """
        support iter function for stock. 

            >>> stock.data.adv={'20110804':{'601919':{'close':'11.11'},'601920':{'close':'22.22'}}}
            >>> stock.date = None
            >>> check = True
            >>> for date in stock:
            ...    print date
            20110804
            >>> for code , price in stock['20110804']:
            ...     print code ,price
            601919 11.11
            601920 22.22

        """
        if self.date: # if set date ,then return data in date, else return all dates in stock
            data=self.data.get(name="adv",conf={"date":self.date,"code":"all"})
            if data:
                result=map(lambda code:(code , data.get(code,{})))
                if result:
                    return iter(result)
            return iter([])
        else:
            return iter(self.data.adv.keys())

    def __getitem__(self,value):
        """
        通过[],setDate 取值将会改变stock的基准值,而通过方法 index()则不会

            >>> stock.data.adv={'20110805':{'601919':{'close':'10.0'},'601920':{'close':'22.22'}}}
            >>> stock['601919']['20110805'].close
            '10.0'

        """
        if len(value)==6:
            self.code = value
        if len(value)==8:
            self.date = value
        return self

    def __getattr__(self,value):
        """
        define some simple way to access data in stock.

            >>> len(stock.allCode)>1000 #and len(stock.allCode) == len(stock.info)
            True
            >>> len(stock.info) > 100
            True
            >>> stock.data.adv={'20110804':{'601919':{'close':'11.11','volume':'111','high':'12','low':'10',"sequence": [ 7.34]},'601920':{'close':'22.22'}}}
            >>> stock['601919']['20110804'].close
            '11.11'
            >>> stock.volume
            '111'
            >>> stock.high
            '12'
            >>> stock.low
            '10'
            >>> stock['20110804']['601919'].sequence
            [7.34]
        """
        result = self.data.get(name = value,conf={"date":self.date,"code":self.code})
        if result == None:
            return 0
        else:
            return result

    def __len__(self):
        """
        get code length in stock data.
            
            >>> len(stock) > 1000
            True

        """
        return len(self.allCode)

    def index(self,index):
        self.date=Date.getDate(index , self.date)
        return self

    def ma(self,dateRange):
        """
        求指定日期内平均股价
        """
        return self.data.get(name = "ma",conf={"date":self.date,"code":self.code,"dateRange":dateRange}) or 0

    def max(self,dateRange):
        return self.data.get(name = "max",conf={"date":self.date,"code":self.code,"dateRange":dateRange}) or 0

    def min(self,dateRange):
        return self.data.get(name = "min",conf={"date":self.date,"code":self.code,"dateRange":dateRange}) or 0
Ejemplo n.º 14
0
 def __init__(self,conf={}):
     self.conf=conf
     self.code=None
     self.date=None
     self.data=Data(self.conf) #store and cache data
Ejemplo n.º 15
0
import seaborn as sns
sns.set()
import matplotlib.pyplot as plt
from Timer import Timer
import lmfit

# Construct discretized domain object for hybrid model
domain = Domain(name='Domain')
domain.add_axis(x_min=5,
                x_max=100,
                m=30,
                disc_by='FeretMean',
                name='FeretMean')

# Create data-set and set up data-shuffler
data = Data(case_id='Laboratory lactose case study')
data.load_from_pickle(
    'C:/Users/rfjoni/PycharmProjects/ParticleModel/projects/CACE_cases/CACE_lactose_study/lactose'
)
data.batches[2].batch_id = 'Batch 1'
data.batches[3].batch_id = 'Batch 2'

# Convert time and temperature data to polynomial fit
# Batch 1
t_batch1 = [
    (measurement.time - data.batches[2].measurements[0].time).total_seconds()
    for measurement in data.batches[2].measurements
]
T_batch1 = [
    measurement.external_sensors[2].value
    for measurement in data.batches[2].measurements
Ejemplo n.º 16
0
nun_days = 910  #numero de candles
batch_size = 1  #divisao em blocos
#¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨
#instanciar objetos
"""
Sobre os dados

Estes dados são informações retiradas da BMF Bovespa, o periodo é Intraday,além das informações que formam um candlestick, 
são associados as colunas, informações de indicadores técnicos.
Index(['Hora', 'dif', 'retracao +', 'retracao -', 'RSI', 'M22M44', 'M22M66',
       'M66M44', 'ADX', 'ATR', 'Momentum', 'CCI', 'Bears', 'Bulls', 'Stock1',
       'Stock2', 'Wilians', 'Std', 'MFI', 'target'],
      dtype='object')
O rótulos são iformações que consideram a tendência do preços, 1: compra, 2: venda e 0:sem operação
"""
data = Data(nun_days, batch_size)
entrada, entrada_trader, base, media, std = data.import_data()
labels = Labels()
data_labels = labels.index_labels(base, entrada)
print('Nome das colunas: ', data_labels.columns)
print('Quantidade de cada categória: ', data_labels.target.value_counts())
"""
Normalização dos dados

A padronização de dados dá aos dados média zero e variação unitária, é uma boa prática,
especialmente para algoritmos como KNN, que é baseado na distância dos casos:
"""
#separando os dados
colunas = [
    'Hora', 'dif', 'retracao +', 'retracao -', 'RSI', 'M22M44', 'M22M66',
    'M66M44', 'ADX', 'ATR', 'Momentum', 'CCI', 'Bears', 'Bulls', 'Stock1',
Ejemplo n.º 17
0
class Model:
    def __init__(self, params):
        self.params = params
        self.cnn = Cnn(params)

        # helper class for storing run details
        self.runs = Runs()

    def start(self):
        # tf Graph
        self.x = tf.placeholder("float", [
            None, self.params["post_padding_size"],
            self.params["comment_padding_size"], self.params["word2vec_dim"]
        ],
                                name="input_x")
        self.y_sentiment = tf.placeholder("float", [
            None, self.params["post_padding_size"],
            self.params["n_classes_sentiment"]
        ],
                                          name="input_y_sentiment")
        self.y_topics = tf.placeholder("float", [
            None, self.params["post_padding_size"],
            self.params["n_classes_topics"]
        ],
                                       name="input_y_topics")
        self.y_emotion = tf.placeholder("float", [
            None, self.params["post_padding_size"],
            self.params["n_classes_emotion"]
        ],
                                        name="input_y_emotion")
        self.y_speech_acts = tf.placeholder("float", [
            None, self.params["post_padding_size"],
            self.params["n_classes_speech_acts"]
        ],
                                            name="input_y_speech_acts")

        self.keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
        self.sequence_length = tf.placeholder(tf.int32, [None])

        fully_connected_params = {
            "out_sentiment_w":
            tf.Variable(
                tf.random_normal([
                    self.params["n_hidden"], self.params["n_classes_sentiment"]
                ])),
            "out_topics_w":
            tf.Variable(
                tf.random_normal(
                    [self.params["n_hidden"],
                     self.params["n_classes_topics"]])),
            "out_emotion_w":
            tf.Variable(
                tf.random_normal([
                    self.params["n_hidden"], self.params["n_classes_emotion"]
                ])),
            "out_speech_acts_w":
            tf.Variable(
                tf.random_normal([
                    self.params["n_hidden"],
                    self.params["n_classes_speech_acts"]
                ])),
            "out_sentiment_b":
            tf.Variable(tf.random_normal([self.params["n_classes_sentiment"]
                                          ])),
            "out_topics_b":
            tf.Variable(tf.random_normal([self.params["n_classes_topics"]])),
            "out_emotion_b":
            tf.Variable(tf.random_normal([self.params["n_classes_emotion"]])),
            "out_speech_acts_b":
            tf.Variable(
                tf.random_normal([self.params["n_classes_speech_acts"]]))
        }

        self.lstm = Lstm(params=self.params,
                         fully_connected_params=fully_connected_params)

        # get predictions
        self.predictions = self.lstm.model(
            x=self.cnn.model(self.x, self.keep_prob),
            sequence_length=self.sequence_length,
            keep_prob=self.keep_prob)

        # define loss
        with tf.name_scope("loss_sentiment"):
            self.cost_sentiment = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(
                    logits=self.predictions["prediction_sentiment"],
                    labels=self.y_sentiment))

        with tf.name_scope("loss_topics"):
            self.cost_topics = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=self.predictions["prediction_topics"],
                    labels=self.y_topics))

        with tf.name_scope("loss_emotions"):
            self.cost_emotions = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=self.predictions["prediction_emotion"],
                    labels=self.y_emotion))

        with tf.name_scope("loss_speech_acts"):
            self.cost_speech_acts = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=self.predictions["prediction_speech_acts"],
                    labels=self.y_speech_acts))

        # define optimizer
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.params["learning_rate"]).minimize(
                self.cost_sentiment + self.cost_topics + self.cost_emotions +
                self.cost_speech_acts)

        # evaluate model
        with tf.name_scope("accuracy_sentiment"):
            correct_pred_sentiment = tf.equal(
                tf.argmax(self.predictions["prediction_sentiment"], 1),
                tf.argmax(self.y_sentiment, 1))
            self.accuracy_sentiment = tf.reduce_mean(
                tf.cast(correct_pred_sentiment, tf.float32))

        with tf.name_scope("accuracy_topics"):
            correct_pred_topics = tf.equal(
                tf.round(tf.nn.sigmoid(self.predictions["prediction_topics"])),
                tf.round(self.y_topics))
            self.accuracy_topics = tf.reduce_mean(
                tf.cast(correct_pred_topics, tf.float32))

        with tf.name_scope("accuracy_emotion"):
            correct_pred_emotion = tf.equal(
                tf.round(tf.nn.sigmoid(
                    self.predictions["prediction_emotion"])),
                tf.round(self.y_emotion))
            self.accuracy_emotion = tf.reduce_mean(
                tf.cast(correct_pred_emotion, tf.float32))

        with tf.name_scope("accuracy_speech_acts"):
            correct_pred_speech_acts = tf.equal(
                tf.round(
                    tf.nn.sigmoid(self.predictions["prediction_speech_acts"])),
                tf.round(self.y_speech_acts))
            self.accuracy_speech_acts = tf.reduce_mean(
                tf.cast(correct_pred_speech_acts, tf.float32))

        # initializing the variables
        self.init = tf.global_variables_initializer()
        # 'Saver' op to save and restore all the variables
        self.saver = tf.train.Saver()

        # get data object
        self.data = Data(
            filename='data/word2vec/wiki.hr.vec',
            comment_padding_size=self.params["comment_padding_size"],
            post_padding_size=self.params["post_padding_size"],
            word2vec_dim=self.params["word2vec_dim"],
            binary_sentiment=self.params["binary_sentiment"])

        self.runs.create_run()
        # START LEARNING!!!
        self.learn()

    def learn(self):
        with tf.Session() as sess:
            self.sess = sess
            # initialize session
            sess.run(self.init)

            step = 1
            counter = 0
            for epoch in range(0, self.params["max_epoch"]):
                if epoch % self.params["evaluate_every"] == 0:
                    self.evaluate()
                    self.runs.save_model(sess=self.sess, saver=self.saver)
                batch_x = []
                batch_seq_length = []
                batch_y_sentiment = []
                batch_y_topics = []
                batch_y_emotions = []
                batch_y_speech_acts = []

                with open('data/threads/splits/split-0/train.txt',
                          encoding="UTF-8") as f:
                    for line in f:
                        x, sequence_length_next, y_sentiment_next, y_topics_next, y_emotion_next, y_speech_acts_next = \
                            self.data.get_next(line)
                        batch_x.append(x)
                        batch_seq_length.append(sequence_length_next)
                        batch_y_sentiment.append(y_sentiment_next)
                        batch_y_topics.append(y_topics_next)
                        batch_y_emotions.append(y_emotion_next)
                        batch_y_speech_acts.append(y_speech_acts_next)
                        counter += 1
                        if len(batch_x) == self.params["batch_size"]:
                            # turn input to np.array
                            batch_x = np.array(batch_x)
                            batch_y_sentiment = np.array(batch_y_sentiment)
                            batch_seq_length = np.array(batch_seq_length)
                            # reshape input
                            batch_x = batch_x.reshape(
                                (self.params["batch_size"],
                                 self.params["post_padding_size"],
                                 self.params["comment_padding_size"],
                                 self.params["word2vec_dim"]))
                            batch_y_sentiment = batch_y_sentiment.reshape(
                                (self.params["batch_size"],
                                 self.params["post_padding_size"],
                                 self.params["n_classes_sentiment"]))
                            # TRAIN HERE
                            sess.run(self.optimizer,
                                     feed_dict={
                                         self.x:
                                         batch_x,
                                         self.y_sentiment:
                                         batch_y_sentiment,
                                         self.y_topics:
                                         batch_y_topics,
                                         self.y_emotion:
                                         batch_y_emotions,
                                         self.y_speech_acts:
                                         batch_y_speech_acts,
                                         self.sequence_length:
                                         batch_seq_length,
                                         self.keep_prob:
                                         self.params["keep_prob_global_train"]
                                     })

                            step += 1

                            if step % self.params["display_step"] == 0:
                                # SENTIMENT
                                acc_sentiment = sess.run(
                                    self.accuracy_sentiment,
                                    feed_dict={
                                        self.x:
                                        batch_x,
                                        self.y_sentiment:
                                        batch_y_sentiment,
                                        self.sequence_length:
                                        batch_seq_length,
                                        self.keep_prob:
                                        self.params["keep_prob_global_train"]
                                    })
                                loss_sentiment = sess.run(
                                    self.cost_sentiment,
                                    feed_dict={
                                        self.x:
                                        batch_x,
                                        self.y_sentiment:
                                        batch_y_sentiment,
                                        self.sequence_length:
                                        batch_seq_length,
                                        self.keep_prob:
                                        self.params["keep_prob_global_train"]
                                    })
                                print("Epoch: " + str(epoch + 1) +
                                      " Iteration: " +
                                      str(step * self.params["batch_size"]))
                                print("[SENTIMENT]  " + " Minibatch Loss= "
                                      "{:.4f}".format(loss_sentiment) +
                                      ", Minibatch Accuracy= "
                                      "{:.4f}".format(acc_sentiment))

                                # EMOTIONS
                                acc_emotion = sess.run(
                                    self.accuracy_emotion,
                                    feed_dict={
                                        self.x:
                                        batch_x,
                                        self.y_emotion:
                                        batch_y_emotions,
                                        self.sequence_length:
                                        batch_seq_length,
                                        self.keep_prob:
                                        self.params["keep_prob_global_train"]
                                    })
                                loss_emotion = sess.run(
                                    self.cost_emotions,
                                    feed_dict={
                                        self.x:
                                        batch_x,
                                        self.y_emotion:
                                        batch_y_emotions,
                                        self.sequence_length:
                                        batch_seq_length,
                                        self.keep_prob:
                                        self.params["keep_prob_global_train"]
                                    })
                                print("[EMOTION]     " + " Minibatch Loss= "
                                      "{:.4f}".format(loss_emotion) +
                                      ", Minibatch Accuracy= "
                                      "{:.4f}".format(acc_emotion))

                                # TOPICS
                                acc_topics = sess.run(
                                    self.accuracy_topics,
                                    feed_dict={
                                        self.x:
                                        batch_x,
                                        self.y_topics:
                                        batch_y_topics,
                                        self.sequence_length:
                                        batch_seq_length,
                                        self.keep_prob:
                                        self.params["keep_prob_global_train"]
                                    })
                                loss_topics = sess.run(
                                    self.cost_topics,
                                    feed_dict={
                                        self.x:
                                        batch_x,
                                        self.y_topics:
                                        batch_y_topics,
                                        self.sequence_length:
                                        batch_seq_length,
                                        self.keep_prob:
                                        self.params["keep_prob_global_train"]
                                    })
                                print("[TOPICS]     " + " Minibatch Loss= "
                                      "{:.4f}".format(loss_topics) +
                                      ", Minibatch Accuracy= "
                                      "{:.4f}".format(acc_topics))

                                # SPEECH ACTS
                                acc_speech_acts = sess.run(
                                    self.accuracy_speech_acts,
                                    feed_dict={
                                        self.x:
                                        batch_x,
                                        self.y_speech_acts:
                                        batch_y_speech_acts,
                                        self.sequence_length:
                                        batch_seq_length,
                                        self.keep_prob:
                                        self.params["keep_prob_global_train"]
                                    })
                                loss_speech_acts = sess.run(
                                    self.cost_speech_acts,
                                    feed_dict={
                                        self.x:
                                        batch_x,
                                        self.y_speech_acts:
                                        batch_y_speech_acts,
                                        self.sequence_length:
                                        batch_seq_length,
                                        self.keep_prob:
                                        self.params["keep_prob_global_train"]
                                    })
                                print("[SPEECH ACTS] " + "Minibatch Loss= "
                                      "{:.4f}".format(loss_speech_acts) +
                                      ", Minibatch Accuracy= "
                                      "{:.4f}".format(acc_speech_acts))

                                print("")

                            # RESET BATCH
                            batch_x = []
                            batch_seq_length = []
                            batch_y_sentiment = []
                            batch_y_topics = []
                            batch_y_emotions = []
                            batch_y_speech_acts = []
            self.evaluate()

    def evaluate(self):
        evaluate = Evaluate(self.data, self.params, self.predictions,
                            self.sess, self.x, self.sequence_length,
                            self.keep_prob, self.y_sentiment, self.y_emotion,
                            self.y_topics, self.y_speech_acts)
        evaluate.execute_evaluation("TRAIN", "train.txt")
        evaluate.execute_evaluation("TEST", "test.txt")