コード例 #1
0
ファイル: 2net-shield.py プロジェクト: albinhubsch/lucy-light
def main():

    # Get the application data
    f = open(".access_token", "r")
    keydata = json.loads(f.read())

    # Initiate the moves object
    m = Moves(keydata)

    # Initiate the com link with arduino
    c = Comm()

    loops = 0

    # Run program loop
    while True:

        state = 0

        # Load date interval
        currentDate = datetime.datetime.now().strftime("%Y%m%d")
        oldDate = (datetime.datetime.now() - datetime.timedelta(days=30)).strftime("%Y%m%d")

        data = m.getRangeSummary(oldDate, currentDate)
        processor = DataProcessor(data)

        msg = processor.getDuration()

        print msg

        c.send(msg)

        # Sleep program untill next check
        time.sleep(30)
コード例 #2
0
    def get_graph(self, area=None, company=None):
        """
        Queries tweets by given area/company filter.
        """
        # Get all tweets or filtered by area
        if area:
            data = self.client.get_tweets_by_area(area)
            suffix = area.upper()
        elif company:
            data = self.client.get_tweets_by_company(company)
            suffix = company.upper()
        else:
            data = self.client.get_all_tweets()
            suffix = 'GLOBAL'

        processor = DataProcessor(data)

        # Get the time series data
        time_series = processor.prepare_time_series()

        # Save all the graph info in a list we can access from the view template
        graph = [
            dict(data=[dict(x=time_series.index, y=time_series)],
                 layout=dict(title='Tweet Frequency - ' + suffix),
                 id='timeseries')
        ]

        # Plotly needs the graph/pandas data encoded in compatible JSON format
        graph = json.dumps(graph, cls=plotly.utils.PlotlyJSONEncoder)

        return graph
コード例 #3
0
    def compute_scores(self, estimator):
        dp = DataProcessor()

        already_processed = False
        previous_commit = None
        all_scores = []

        reports = dp.read_and_process_report_data(self.path_to_reports_data, self.project)
        #print self.train_split_index_start, self.train_split_index_end

        reports_to_process = reports[self.train_split_index_start: self.train_split_index_end]
        pool = pp.ProcessPool(10) #don't have more than number of reports??
        self.cur_estimator = estimator

        all_scores = pool.map(self.get_report_score, reports_to_process)
        #pool.close()
        #pool.join()
        all_matrixes = [i[0] for i in all_scores]
        total_tried = sum([i[1] for i in all_scores])
        number_achieved = sum([i[2] for i in all_scores])

        print "finished pooling"
        print all_scores
        final_MAP_score = self.MAP(all_matrixes)
        final_MRR_score = self.MRR(all_matrixes)
        print final_MAP_score, " final MAP score"
        print final_MRR_score, " final MRR score"
        print float(number_achieved)/float(total_tried), " final accuracy at k score"
        return final_MAP_score
コード例 #4
0
def InferenceData(trainer):
    images = ImageIO.ReadImagesFromFolder("../data/monster/himax_processed/",
                                          '.jpg', 0)
    [x_live, y_live] = DataProcessor.ProcessInferenceData(images, 60, 108)
    live_set = Dataset(x_live, y_live)
    params = {'batch_size': 1, 'shuffle': False, 'num_workers': 0}
    live_generator = data.DataLoader(live_set, **params)

    y_pred_himax = trainer.Infer(live_generator)
    y_pred_himax = np.reshape(y_pred_himax, (-1, 4))
    h_images = images

    images = ImageIO.ReadImagesFromFolder("../data/monster/bebop_processed/",
                                          '.jpg', 0)
    [x_live, y_live] = DataProcessor.ProcessInferenceData(images, 60, 108)
    live_set = Dataset(x_live, y_live)
    params = {'batch_size': 1, 'shuffle': False, 'num_workers': 0}
    live_generator = data.DataLoader(live_set, **params)

    y_pred_bebop = trainer.Infer(live_generator)
    y_pred_bebop = np.reshape(y_pred_bebop, (-1, 4))

    combinedImages = []
    for i in range(len(images)):
        img = ImageEffects.ConcatImages(images[i], h_images[i])
        combinedImages.append(img)

    VizDroneBEV(combinedImages, y_pred_bebop, y_pred_himax)
コード例 #5
0
ファイル: ETH.py プロジェクト: abhishekmadhu/FrontNetPorting
def LoadData(args):

    if args.gray is not None:
        [x_train, x_validation, y_train, y_validation
         ] = DataProcessor.ProcessTrainData(args.load_trainset, 60, 108, True)
        [x_test,
         y_test] = DataProcessor.ProcessTestData(args.load_testset, 60, 108,
                                                 True)
    else:
        [x_train, x_validation, y_train, y_validation
         ] = DataProcessor.ProcessTrainData(args.load_trainset, 60, 108)
        [x_test,
         y_test] = DataProcessor.ProcessTestData(args.load_testset, 60, 108)

    training_set = Dataset(x_train, y_train, True)
    validation_set = Dataset(x_validation, y_validation)
    test_set = Dataset(x_test, y_test)

    # Parameters
    num_workers = 6
    params = {
        'batch_size': args.batch_size,
        'shuffle': True,
        'num_workers': num_workers
    }
    train_loader = data.DataLoader(training_set, **params)
    validation_loader = data.DataLoader(validation_set, **params)
    params = {
        'batch_size': args.batch_size,
        'shuffle': False,
        'num_workers': num_workers
    }
    test_loader = data.DataLoader(test_set, **params)

    return train_loader, validation_loader, test_loader
コード例 #6
0
def test_reading_in():
    dp = DataProcessor()

    dp.get_stackoverflow_data("/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/birt/")
    dp.get_stackoverflow_data("/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/eclipse/")
    dp.get_stackoverflow_data("/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/eclipse-jdt/")
    dp.get_stackoverflow_data("/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/swt/")
コード例 #7
0
def LoadData(args):
    [x_train, x_validation, y_train,
     y_validation] = DataProcessor.ProcessTrainData(args.load_trainset)
    [x_test, y_test] = DataProcessor.ProcessTestData(args.load_testset)

    training_set = Dataset(x_train, y_train, True)
    validation_set = Dataset(x_validation, y_validation)
    test_set = Dataset(x_test, y_test)

    # Parameters
    # num_workers - 0 for debug in Mac+PyCharm, 6 for everything else
    num_workers = 0
    params = {
        'batch_size': args.batch_size,
        'shuffle': True,
        'num_workers': num_workers
    }
    train_loader = data.DataLoader(training_set, **params)
    validation_loader = data.DataLoader(validation_set, **params)
    params = {
        'batch_size': args.batch_size,
        'shuffle': False,
        'num_workers': num_workers
    }
    test_loader = data.DataLoader(test_set, **params)

    return train_loader, validation_loader, test_loader
コード例 #8
0
class StockAnalysisServer:
    def __init__(self):
        self.data_reader = DataReader()
        self.data_processor = DataProcessor()

    def get_expected_stock_return_probablity(self,
                                             ticker_name,
                                             time_horizon,
                                             base_amount,
                                             finalAmount,
                                             historic_data_years=1):
        '''
        This method computes the probability of the expected return
        :param ticker_name: The ticker name the user wishes to return
        :param time_horizon:The time horizon the user wishes to invest for
        :param base_amount:The base amount the user wishes to start investing with
        :param finalAmount:The final amount the user wishes to achieve
        :param historic_data_years:The no of years the user wishes to use for base values calculation
        :return:The probability of user achieving the target amount
        '''
        from_date = datetime.date.today() - relativedelta(
            years=historic_data_years)
        ticker_data = self.data_reader.fetch_ticker_data(
            ticker_name, from_date)  # '1/1/2017'
        base_rate, volatility = self.data_processor.calc_base_rate_and_volatility_by_ticker(
            ticker_data)
        probablity_expected_return = self.data_processor.calc_expected_return_probablity_based_on_monte_carlo(
            base_rate, volatility, time_horizon, base_amount, finalAmount)
        return probablity_expected_return
コード例 #9
0
def process_files_eclipse():
    dp = DataProcessor()
    path_to_reports_data = "/home/ndg/users/carmst16/EmbeddingBugs/resources/bugreport/Eclipse_Platform_UI.xlsx"
    path_to_starter_repo = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/eclipse.platform.ui/"
    path_to_processed_repo = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/eclipse.platform.ui_processed_split/"
    path_to_temp = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/eclipse.platform.ui_temp/"
    reports = dp.read_and_process_report_data(path_to_reports_data, "eclipse_platform_ui")
    dp.process_all_files(path_to_starter_repo, reports, path_to_processed_repo, path_to_temp)
コード例 #10
0
def process_files_birt():
    dp = DataProcessor()
    path_to_reports_data = "/home/ndg/users/carmst16/EmbeddingBugs/resources/bugreport/Birt.xlsx"
    path_to_starter_repo = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/birt/"
    path_to_processed_repo = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/birt_processed_split/"
    path_to_temp = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/birt_temp/"
    reports = dp.read_and_process_report_data(path_to_reports_data, "birt")
    dp.process_all_files(path_to_starter_repo, reports, path_to_processed_repo, path_to_temp)
コード例 #11
0
def process_files_swt():
    dp = DataProcessor()
    path_to_reports_data = "/home/ndg/users/carmst16/EmbeddingBugs/resources/bugreport/SWT.xlsx"
    path_to_starter_repo = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/eclipse.platform.swt/"
    path_to_processed_repo = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/eclipse.platform.swt_processed_split_text_trial/"
    path_to_temp = "/home/ndg/users/carmst16/EmbeddingBugs/resources/source_files/test/eclipse.platform.swt_temp_again/"
    reports = dp.read_and_process_report_data(path_to_reports_data, "swt")
    dp.process_all_files(path_to_starter_repo, reports, path_to_processed_repo, path_to_temp)
コード例 #12
0
 def __init__(self, model_path=None):
     self.config = DataConfig()
     self.dp = DataProcessor(self.config)
     self.num_channels = self.config.num_channels
     self.row = self.config.img_height
     self.col = self.config.img_width
     self.ch = self.config.num_channels
     self.model = self.load_model(model_path)
コード例 #13
0
ファイル: FileStreamer.py プロジェクト: longedok/Zalgo
 def __init__(self, path, part_size, stream_id):
     self.__part_size = part_size
     dp = DataProcessor()
     fl = FileLoader()
     data = fl.load_file(path)
     self.__size = len(data)
     self.__chunks = dp.split_data(data, part_size)
     debug('FileStreamer.init(): len(self.__chunks) == %d' % len(self.__chunks))
     self.__stream_id = stream_id
コード例 #14
0
ファイル: Scheduler.py プロジェクト: jtcass01/Pytter
class Scheduler(object):
    """description of class"""
    def __init__(self):
        self.schedule = DataProcessor(
            "C:/Users/JakeT/OneDrive/documents/visual studio 2017/Projects/Pytter/Pytter/database/tweet_schedule.csv"
        )

    def schedule_tweet(self, tweet):
        #        print(self.schedule, "schedule before")
        self.schedule.add_row(tweet.to_list())
        #        print(self.schedule, "schedule after")
        pass
コード例 #15
0
ファイル: net-shield.py プロジェクト: albinhubsch/lucy-light
def main():

	# Get the application data
	f = open('.access_token', 'r')
	keydata = json.loads(f.read())

	# Initiate the moves object
	m = Moves(keydata)
	
	# Initiate the com link with arduino
	c = Comm()

	loops = 0

	# Run program loop
	while True:

		state = 0

		if loops is 0:
			# Load date interval
			currentDate = datetime.datetime.now().strftime('%Y%m%d')
			oldDate = (datetime.datetime.now() - datetime.timedelta(days=30)).strftime('%Y%m%d')

			data = m.getRangeSummary(oldDate, currentDate)
			processor = DataProcessor(data)

			raw = processor.newDataProcessor()

			if processor.checkMoving():
				state = 1


		# Check realtime
		realtime = datetime.datetime.strptime(requests.get('http://studier.albinhubsch.se/lucy-light').text, "%Y-%m-%d %H:%M:%S")
		now = datetime.datetime.now()

		if realtime + datetime.timedelta(minutes=10) > now:
			state = 1


		msg = str(state) + ',' + raw

		c.send(msg)

		if loops < 10:
			loops += 1
		else:
			loops = 0

		# Sleep program untill next check
		time.sleep(6)
コード例 #16
0
def delta_learning_rule(data, a, lr, co):
    count = 0
    npa = np.array(a)
    co_copy = deepcopy(co)
    dp.plot_graph(npa, co_copy)
    for i in range(len(data)):
        data['input'][i].insert(0, 1)
    while count < 4:
        for row in data.itertuples():
            y = step_function(npa.dot(np.array([row[1]]).T))
            npa = npa + lr * (row[2] - y) * np.array(row[1])
            dp.plot_graph(npa, co_copy)
        count += 1
コード例 #17
0
def spl(data, a, lr, co):
    count = 0
    npa = np.array(a)
    co_copy = deepcopy(co)
    dp.plot_graph(npa, co_copy)
    for i in range(len(data)):
        data['input'][i].insert(0, 1)
    while count < 4:
        for row in data.itertuples():
            gx = npa.dot(np.array([row[1]]).T)
            if row[2] > 0 and gx[0] < 0 or row[2] < 0 and gx[0] > 0:
                npa = npa + lr * row[2] * np.array(row[1])
                dp.plot_graph(npa, co_copy)
        count += 1
コード例 #18
0
class Profile(object):
    """TODO: ADD CLASS DESCRIPTION"""
    def __init__(self):
        self.processor = DataProcessor(file_path=USERPATH)

    def authenticate(self):
        consumer_key = list(self.processor.get_column_np("consumer_key"))[0]
        consumer_secret = list(
            self.processor.get_column_np("consumer_secret"))[0]
        access_token = list(self.processor.get_column_np("access_token"))[0]
        access_token_secret = list(
            self.processor.get_column_np("access_token_secret"))[0]

        return twitter.OAuth(access_token, access_token_secret, consumer_key,
                             consumer_secret)
コード例 #19
0
ファイル: model.py プロジェクト: ztypl/MLexp
 def __init__(self, filename, embedding_method='deepwalk', **kwargs):
     self.dp = DataProcessor(filename)
     self.workers = cpu_count()
     self.embedding_model = None
     self.embedding_method = embedding_method
     print("Init over.")
     sys.stdout.flush()
     if embedding_method == 'deepwalk':
         self.deepwalk(**kwargs)
     elif embedding_method == 'grarep':
         self.grarep(**kwargs)
     elif embedding_method == "node2vec":
         self.node2vec(**kwargs)
     else:
         raise TypeError("Unsupport type %s" % embedding_method)
コード例 #20
0
ファイル: lstm.py プロジェクト: TheIannaman/iot_eCare_ai
def getDataFromDB():
    """ Funzione per ottenere i dati dal server locale influxdb contenente le misurazioni dei pazienti. """

    ipDB = os.getenv('INFLUX_IP_AI', 'localhost')
    portDB = os.getenv('INFLUX_PORT_AI', '8086')
    userDB = os.getenv('INFLUX_USER_AI', 'admin')
    passwordDB = os.getenv('INFLUX_PW_AI', 'G10m1R0m3')
    nameDB = os.getenv('INFLUX_DB_AI', 'giomi')

    dr = DataRetriever(metrics)
    dfs = dr.loadDataFromDB(ipDB, portDB, userDB, passwordDB, nameDB)

    dp = DataProcessor(metrics, defaults)
    df = dp.applyPipeline(dfs)

    return df
コード例 #21
0
def main():
    logging.basicConfig(level=logging.INFO,
                        format="%(asctime)s - %(levelname)s - %(message)s",
                        datefmt="%Y-%m-%d %H:%M:%S",
                        filename="log.txt",
                        filemode='w')

    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    model = FrontNet(PreActBlock, [1, 1, 1])
    ModelManager.Read('Models/FrontNetNicky.pt', model)

    DATA_PATH = "/Users/usi/PycharmProjects/data/"

    [x_test, y_test,
     z_test] = DataProcessor.ProcessTestData(DATA_PATH + "TestNicky.pickle",
                                             60, 108)
    #x_test = x_test
    #y_test = y_test
    test_set = Dataset(x_test, y_test)
    params = {'batch_size': 1, 'shuffle': False, 'num_workers': 0}
    test_generator = data.DataLoader(test_set, **params)
    trainer = ModelTrainer(model)

    valid_loss_x, valid_loss_y, valid_loss_z, valid_loss_phi, outputs, gt_labels = trainer.ValidateSingleEpoch(
        test_generator)

    VizDroneBEV(x_test, y_test, z_test, outputs)
コード例 #22
0
def main():
    logging.basicConfig(level=logging.INFO,
                        format="%(asctime)s - %(levelname)s - %(message)s",
                        datefmt="%Y-%m-%d %H:%M:%S",
                        filename="log.txt",
                        filemode='w')

    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    DATA_PATH = "/Users/usi/PycharmProjects/data/160x90/"
    name = "160x90HimaxMixedTrain_12_03_20.pickle"

    [x_test, y_test,
     z_test] = DataProcessor.ProcessTestData(DATA_PATH + name, True)
    h = x_test.shape[2]
    w = x_test.shape[3]
    x_test = np.reshape(x_test, (-1, h, w))

    if name.find(".pickle"):
        name = name.replace(".pickle", '')

    VizWorldTopView(x_test, y_test, z_test, True, name)
コード例 #23
0
def main():

    logging.basicConfig(level=logging.INFO,
                        format="%(asctime)s - %(levelname)s - %(message)s",
                        datefmt="%Y-%m-%d %H:%M:%S",
                        filename="log.txt",
                        filemode='w')

    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    DATA_PATH = "/Users/usi/PycharmProjects/data/"
    [x_test,
     y_test] = DataProcessor.ProcessTestData(DATA_PATH + "patterns.pickle", 60,
                                             108, True)
    #x_test = x_test[:500]
    #y_test = y_test[:500]
    test_set = Dataset(x_test, y_test)
    params = {'batch_size': 1, 'shuffle': False, 'num_workers': 0}
    test_generator = data.DataLoader(test_set, **params)

    Viz4PoseVariables(x_test, y_test)
コード例 #24
0
class MyListener(StreamListener):

    def __init__(self):
        self.dp = DataProcessor()
        self.conf = Config()
        self.auth = tweepy.OAuthHandler(self.conf.consumer_key, self.conf.consumer_secret)
        self.auth.set_access_token(self.conf.access_token, self.conf.access_token_secret)
        self.conn = Connector()

    def on_data(self, raw_data):
        try:
            self.conn.insert_raw_twitter(raw_data, self.conn.twitterdb_demo_pub)

            target_info = self.dp.get_geo_twi_target_info(raw_data)

            self.conn.insert_raw_twitter_result(target_info, self.conn.twitterdb_demo_results_pub)

            if target_info:
                self.conn.insert_raw_twitter_result(target_info, self.conn.twitterdb_demo_results_pub)

            return True

        except BaseException as e:
            print("Error on_data:%s" % str(e))
        return True

    def on_error(self, status_code):
        if status_code == 420:
            print("ERROR: Rate limit reached")
        print(status_code)
        return True

    def on_timeout(self):
        print("ERROR: Timeout...")
        return True  # Don't kill the stream
コード例 #25
0
    def train(self):

        #this describes everything you want to search over
        parameters = {'size': [100,  500],
                      'window': [5, 10],
                      'sg': [1],
                      'workers': [16],
                      'hs': [0],
                      'negative': [25],
                      'iter': [1]
                      }

        dp = DataProcessor()
        data = dp.get_stackoverflow_data_sentences_all(["/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/swt/", "/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/birt/", "/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/eclipse/", "/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/eclipse-jdt/"])
        #if self.document:
         #   data = dp.get_stackoverflow_data_document(self.path_to_stackoverflow_data)
        #else:
          #  data = dp.get_stackoverflow_data_sentences(self.path_to_stackoverflow_data)
        w2v = W2VTransformer()
        # see: https://stackoverflow.com/questions/44636370/scikit-learn-gridsearchcv-without-cross-validation-unsupervised-learning/44682305#44682305
        #clf = GridSearchCV(w2v, parameters, scoring={"MPP": self.call_MRR, "MAP": self.call_MAP}, verbose=2, n_jobs=3, refit="MAP", cv=[(slice(None), slice(None))])

        #current implementation version only usees MAP to score
        #cv=[(slice(None), slice(None))]
        #clf = GridSearchCV(w2v, parameters, scoring= self.compute_scores, verbose=2)
        cur_max = 0
        best_model = None
        parameters["size"] = [100]
        parameters["window"] = [10]
        for s in parameters["size"]:
            for w in parameters["window"]:
                print len(data)
                print "training model"
                model = gensim.models.Word2Vec(sentences=data, sg=1, size=s, window=w, workers=16, hs=0, negative=25, iter=5)
                print "model trained"
                print parameters
                score = self.compute_scores(model)
                if score > cur_max:
                    cur_max = score
                    best_model = model
        print cur_max
        word_vectors = best_model.wv
        print "VOCAB_SIZE", len(model.wv.vocab)
        word_vectors.save("best_model")
コード例 #26
0
 def __init__(self):
     ops.reset_default_graph()
     self.sess = tf.InteractiveSession()
     self.dp = DataProcessor(DataConfig())
     self.config = self.dp.config
     self.row = self.config.img_height
     self.col = self.config.img_width
     self.ch = self.config.num_channels
     self.batch_count = 0
     self.create_nvidia_model()
     self.create_train_method()
     self.epoch_count = 0
     self.step_count = 0
     self.loss_val = 1
     self.saver = tf.train.Saver()
     if self.config.model_continue:
         self.restore_sess()
     else:
         self.sess.run(tf.global_variables_initializer())
コード例 #27
0
    def get_model_coverage(self):

        parameters = {'size': [100,  500],
                      'window': [5, 10],
                      'sg': [1],
                      'workers': [16],
                      'hs': [0],
                      'negative': [25],
                      'iter': [1]
                      }

        dp = DataProcessor()
        data = dp.get_stackoverflow_data_sentences_all(["/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/swt/", "/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/birt/", "/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/eclipse/", "/home/ndg/users/carmst16/EmbeddingBugs/resources/stackexchangedata/eclipse-jdt/"])

        model = gensim.models.Word2Vec(sentences=data, sg=1, size=100, window=10, workers=16, hs=0, negative=25, iter=1)
        vocab = model.wv.vocab
        print "VOCAB_SIZE", len(vocab)

        reports = dp.read_and_process_report_data(self.path_to_reports_data, self.project)
        all_report_text = []
        all_source_file_text = []
        for report in reports:
            report_text = report.processed_description
            file_path = self.path_to_processed_repo + str(report.reportID) + "/"
            all_report_text.extend(report_text)

            for dir_, _, files in os.walk(file_path):
                for fileName in files:
                    relDir = os.path.relpath(dir_, file_path)
                    relFile = os.path.join(relDir, fileName)
                    full_path = file_path + relFile
                    with open(full_path, 'r') as content_file:
                        content = content_file.readlines()
                        for line in content:
                            l = line.strip().split(",")
                            all_source_file_text.extend(l)

        all_report_vocab = set(all_report_text)
        all_source_file_vocab = set(all_source_file_text)

        print "report coverage", len(set.intersection(all_report_vocab, vocab))/ float(len(all_report_vocab))
        print "source file coverage", len(set.intersection(all_source_file_vocab, vocab))/ float(len(all_source_file_vocab))
コード例 #28
0
def Selector1():
    selector = Selector()
    dataProcessor = DataProcessor()
    #abil = selector.getAbilityWithId(3)
    passed = selector.runSelector()
    print(passed)
    selector.setCharacterStats(15, 15)
    passed = selector.runSelector()
    print(passed)
    print(selector.HP)
    print(selector.getAuraWithId(selector.getAbilityWithId(2)['auraOne']))
コード例 #29
0
ファイル: ETL.py プロジェクト: jlgjunior/gps_etl
def main():
    reader = DataReader('dataSrc')

    data = reader.readCoordinates()

    processor = DataProcessor(data)
    locations = processor.processDataPoints()
    try:
        for location in locations:
            location.state.country.addNew()
            location.state.country_id = location.state.country.id
            #location.state.country = None
            location.state.addNew()
            location.state_id = location.state.id
            #location.state = None
            location.addNew()
    except Exception as e:
        print(e)

    print(Location.listAll())
コード例 #30
0
def main():
    logging.basicConfig(level=logging.INFO,
                        format="%(asctime)s - %(levelname)s - %(message)s",
                        datefmt="%Y-%m-%d %H:%M:%S",
                        filename="log.txt",
                        filemode='w')

    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)


    DATA_PATH = "/Users/usi/PycharmProjects/data/160x90/"

    # Get baseline results

    picklename = "160x90HimaxMixedTest_12_03_20.pickle"
    [x_test, y_test] = DataProcessor.ProcessTestData(DATA_PATH + picklename)
    test_set = Dataset(x_test, y_test)
    params = {'batch_size': 1, 'shuffle': False, 'num_workers': 1}
    test_generator = data.DataLoader(test_set, **params)
    model = Dronet(PreActBlock, [1, 1, 1], True)
    ModelManager.Read('../PyTorch/Models/DronetHimax160x90AugCrop.pt', model)
    trainer = ModelTrainer(model)
    MSE2, MAE2, r2_score2, outputs2, gt_labels2 = trainer.Test(test_generator)

    # Get pitch values

    picklename = "160x90HimaxMixedTest_12_03_20Rot.pickle"
    r_test = DataProcessor.GetRollFromTestData(DATA_PATH + picklename)

    print(r_test)


    if picklename.find(".pickle"):
        picklename = picklename.replace(".pickle", '')


    Plot2Models(r_test, picklename, r2_score2)
コード例 #31
0
def roberta_pair_task(config):

    tokenizer = BertTokenizer.from_pretrained(config.tokenizer_file,
                                              do_lower_case=config.do_lower_case)
    processor = DataProcessor(config)
    config.class_list = processor.get_labels()
    config.num_labels = len(config.class_list)

    train_examples = processor.get_train_examples()
    dev_examples = processor.get_dev_examples()
    augment_examples = processor.read_data_augment(config.data_augment_method)

    cur_model = MODEL_CLASSES[config.use_model]
    model = cur_model(config)

    logging.info("self config %s", config_to_json_string(config))

    model_example, dev_evaluate, predict_label = cross_validation(
        config=config,
        model=model,
        tokenizer=tokenizer,
        train_examples=train_examples,
        dev_examples=dev_examples,
        pattern=config.pattern,
        train_enhancement=augment_examples if config.data_augment else None,
        test_examples=None)
    logging.info("dev_evaluate: {}".format(dev_evaluate))

    if config.pattern == 'full_train':
        model_save(config, model_example)

    return dev_evaluate
コード例 #32
0
ファイル: DataPlotter.py プロジェクト: Hackthings/energy_maps
    def get_graph(self, area=None, company=None):
        """
        Queries tweets by given area/company filter.
        """
        # Get all tweets or filtered by area
        if area:
            data = self.client.get_tweets_by_area(area)
            suffix = area.upper()
        elif company:
            data = self.client.get_tweets_by_company(company)
            suffix = company.upper()
        else:
            data = self.client.get_all_tweets()
            suffix = 'GLOBAL'

        processor = DataProcessor(data)

        # Get the time series data
        time_series = processor.prepare_time_series()

        # Save all the graph info in a list we can access from the view template
        graph = [
            dict(
                data=[
                    dict(
                        x=time_series.index,
                        y=time_series
                    )
                ],
                layout=dict(
                    title='Tweet Frequency - ' + suffix
                ),
                id = 'timeseries'
            )
        ]

        # Plotly needs the graph/pandas data encoded in compatible JSON format
        graph = json.dumps(graph, cls=plotly.utils.PlotlyJSONEncoder)

        return graph
コード例 #33
0
def Test():
    model = Dronet(PreActBlock, [1, 1, 1], True)
    ModelManager.Read("Models/DronetHimax160x90.pt", model)

    trainer = ModelTrainer(model)

    #ModelManager.Read("Models/FrontNetGray.pt", model)
    [x_test, y_test] = DataProcessor.ProcessTestData(
        "/Users/usi/PycharmProjects/data/160x90HimaxStatic_12_03_20.pickle")
    test_set = Dataset(x_test, y_test)

    params = {'batch_size': 64, 'shuffle': False, 'num_workers': 1}
    test_loader = data.DataLoader(test_set, **params)
    trainer.Predict(test_loader)
コード例 #34
0
    def __init__(self):
        self.n_clusters = 2

        self.algorithms = {
            'current': STRPAlgorithm(self.n_clusters),
            'future': STRPAlgorithm(self.n_clusters)
        }

        self.data_processors = {
            'current': DataProcessor(),
            'future': DataProcessor()
        }

        self.max_absolute_treshold = 13
        self.min_absolute_treshold = 5
        self.max_percentual_treshold = .1
        self.min_percentual_treshold = .02

        self.entity_temper_percentual_threshold = .2

        self.is_running = False

        self.container = list()
        self.processed_nodes = list()
        self.raw_data = list()

        self.client = udp_client.UDPClient(OSC_SERVER, 8000)

        self.last_iteration = datetime.now()
        print('Application initialised')
        self.is_running = True

        # Create dummy data
        for i, d in enumerate(start_data):
            transformed_data = self.data_processors[
                'current'].transform_input_data(d)
            self.processed_nodes.append(transformed_data)
コード例 #35
0
def main():

    data_path = "../data/training-Obama-Romney-tweets.xlsx"
    test_data_path = ''
    # test_data_path = '../data/testing-Obama-Romney-tweets-3labels.xlsx'

    try:
        opts, args = getopt.getopt(sys.argv[1:], "d:t:")
        for o, a in opts:
            if o == '-d':
                data_path = a
            elif o == '-t':
                test_data_path = a

    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)
        print 'read the readme file to know how to run this project'
        sys.exit(2)


    dp = DataProcessor(data_path)
    tc = TweetClassifier()

    if test_data_path != '':

        dpt = DataProcessor(test_data_path)

        print '\n****** OBAMA ******\n'
        data = dp.load_excel_data('Obama')
        data_test = dpt.load_excel_data('Obama')
        report = tc.train_test(data, data_test)
        DataProcessor.print_report(report)

        print '\n****** ROMNEY ******\n'
        data = dp.load_excel_data('Romney')
        data_test = dpt.load_excel_data('Romney')
        report = tc.train_test(data, data_test)
        DataProcessor.print_report(report)

    else:
        print '\n****** OBAMA ******\n'
        data = dp.load_excel_data('Obama')
        report = tc.crossvalidate(data, 10)
        DataProcessor.print_report(report)

        print '\n****** ROMNEY ******\n'
        data = dp.load_excel_data('Romney')
        report = tc.crossvalidate(data, 10)
        DataProcessor.print_report(report)
コード例 #36
0
ファイル: MSGSP.py プロジェクト: ragib06/pymsgsp
    data_file = "data/dataS.txt"
    para_file = "data/paraS.txt"
    result_file = "data/resultS.txt"

    if len(sys.argv) > 1:
        if sys.argv[1] == '-d':
            if len(sys.argv) < 4:
                logging.error("Not enough arguments !!")
                sys.exit()
            else:
                data_file = sys.argv[2]
                para_file = sys.argv[3]
                result_file = sys.argv[4]


    DP = DataProcessor(data_file, para_file, result_file, False)
    inputData = DP.loadInput()

    startTime = datetime.now()
    print 'Execution started at:', startTime
    algo = pyMSGSP(inputData["T"], inputData["MS"], inputData["SDC"], logging.INFO)
    # algo = BruteForceSPM(inputData["T"], inputData["MS"], inputData["SDC"])

    outputData = algo.run()
    print 'Execution time:', datetime.now() - startTime

    outputDict = defaultdict(list)

    for seq in outputData:
        count = 0
        for d in inputData["T"]: