def test_filter_info_right_data_extra_column(self): data = [ 'RecordId,EmployID,Name,Age,Year,Salary,Type', '10021,1,Rob,23,2008,65580,Sport,ExtraData' ] test_processor = DataProcessor() self.assertEqual({}, test_processor.filter_info(data))
def get_stations(self, station_id=None): """ Gets all station data from API and inserts it into database. :param station_id: int :return: Union[list, tuple] """ url = self.prepare_url(self.URL_STATIONS) if self.request_is_valid(url): try: raw_data = self.call_api(url) cities = DataProcessor.parse_cities(raw_data) stations = DataProcessor.parse_stations(raw_data) self.db.insert_from_list("cities", cities, True) self.db.insert_from_list("stations", stations, True) except (ApiError, DataProcessingError, DbManagerError) as e: raise DataManagerError("Could not handle API data properly") from e try: stations = self.db.get_all_station_data(station_id) except DbManagerError as e: logger.exception(e) raise DataManagerError("Error when obtaining data from database!") from e if station_id: return stations[0] return stations
def create_connection(self, wb, switch): sheet = self.choose_sheet(wb) target_column = 2 target_row = 1 max_column = sheet.max_column max_row = sheet.max_row data_row = [] row_dict = {} keys = [] data_to_process = {} i = 0 dup_keys = 0 for row in self.row_names: row_dict[self.row_names[i]]: '' i = i + 1 for row in range(0, max_row): # Get the first value from the row to set as the key output = sheet.cell(row=target_row, column=1).value key = dp.validate_key(str(output)) # Check if it's a duplicate key if key in keys: dup_keys += 1 data_to_log = "Duplicate Key" + str(key) Lfh.append_file('log.txt', data_to_log) # Add that key to the list of all keys keys.append(key) data_to_process[key] = {} col_num = 0 for column in range(0, max_column): output = sheet.cell(row=target_row, column=target_column).value data_row.append(str(output)) row_dict[self.row_names[col_num]] = data_row[col_num] target_column = target_column + 1 col_num = col_num + 1 # Skip the ID and Valid rows for row in self.row_names[1:-1]: data_to_process[key][row] = row_dict[row] data_to_process[key]['valid'] = "0" data_row = [] target_column = 1 target_row = target_row + 1 # Send the data to be processed dict_valid = dp.send_to_validate(data_to_process, switch, dup_keys) return dict_valid
def fetch_text_contents(file, switch, separator=","): f = FileReader() dup_keys = 0 keep_going = True data_fields = DataFields.get_data_fields(DataFields) if file is not "": # Repeat for each line in the text file for line in file: # Split file into fields using "," fields = line.split(separator) checked_id = DataProcessor.validate_key(fields[0]) if checked_id in f.dict_root: dup_keys += 1 fields[6] = fields[6].rstrip() data_to_log = "Duplicate Key" + str(fields[0:]) LogFileHandler.append_file('log.txt', data_to_log) else: test_dict = {} field_number = 1 # Ignore the ID field and the Valid field for now for row_name in data_fields[1:-1]: test_dict[row_name] = fields[field_number] field_number += 1 test_dict['valid'] = '0' f.dict_root.update({checked_id: test_dict}) # Close the file to free up resources (good practice) file.close() if keep_going: valid_dict = DataProcessor.send_to_validate(f.dict_root, switch, dup_keys) return valid_dict
def test_parse_methods(self): data = { "Attribute keywords": ["have ", "must have ", "has "], "Method keywords": ["can ", "should "], "Initialization keywords": { "Attribute keywords": ["is initializing by setting ", "by default get "], "Attribute values keywords": [" as ", " equal to ", " = "] } } description_config = DescriptionConfig(data) test_data_processor = DataProcessor() test_attributes = ["apples", "oranges"] test_line1 = "Client can swim" test_line2 = "can give apples, take apples and eat oranges" test_output1 = [{"Method": "swim", "Attributes": []}] test_output2 = [{ "Method": "give apples", "Attributes": ["apples"] }, { "Method": "take apples", "Attributes": ["apples"] }, { "Method": "eat oranges", "Attributes": ["oranges"] }] self.assertEqual( test_data_processor.parse_methods(test_line1, test_attributes, description_config), test_output1) self.assertEqual( test_data_processor.parse_methods(test_line2, test_attributes, description_config), test_output2)
def test_parse_initialization(self): data = { "Attribute keywords": ["have ", "must have ", "has "], "Method keywords": ["can ", "should "], "Initialization keywords": { "Attribute keywords": ["is initializing by setting ", "by default get "], "Attribute values keywords": [" as ", " equal to ", " = "] } } description_config = DescriptionConfig(data) test_data_processor = DataProcessor() test_attributes = ["apples", "oranges"] test_line1 = "Client is initializing by setting apples as 1, oranges as ten" test_line2 = "is initializing by setting apples = 1 and oranges = 5" test_output1 = ["apples", "oranges"], [1, "ten"] test_output2 = ["apples", "oranges"], [1, 5] self.assertEqual( test_data_processor.parse_initialization(test_line1, test_attributes, description_config), test_output1) self.assertEqual( test_data_processor.parse_initialization(test_line2, test_attributes, description_config), test_output2)
def get_contents(self): """ Preparing resume contents which composes by header, spaces and contents with table formatting. """ self.elements = [] data_processor = DataProcessor(self.resume_data) # resume header header_contents = self._set_table(data_processor.header_data(), styles.header_col_widths, styles.header_table_styles) self.elements.append(header_contents) # spaces spaces = Spacer(width=0, height=styles.header_space_height) self.elements.append(spaces) # resume contents table_contents = self._set_table(data_processor.content_data(), styles.content_col_widths, styles.content_table_styles) self.elements.append(table_contents) return self.elements
def test_clean_strings(self): test_data_processor = DataProcessor() test_input_string = "***Smth***\n\n**Smth more**\n \n1. How are you? Are you here?!\n\n2. Hi, I'm Alex. And you?" test_output_string = "How are you Are you here\nHi, Im Alex And you\n" data_processor_output = test_data_processor.clean_strings( test_input_string) self.assertEqual(data_processor_output, test_output_string)
def estimate(args): input_model_file_name = args.input_model_file_name input_exe_file_name = args.input_feature_file_name extraction_method = args.extraction_method label_type = args.label_type # generate an .asm file from the executable file generator = IDAAsmGenerator() generator.generate(input_exe_file_name) # append the information of the input file to the database datproc = DataProcessor() datproc.update_database_from_file() # extract feature vector ams_file_name = os.path.join(os.path.splittext(input_file_name)[0], '.asm') datproc.extract_data_from_file(asm_file_name, extraction_method, label_type) # load classification model estimator = CompilerEstimator() estimator.load_model(input_model_file_name) # estimate result = estimator.estimate(feature_vector) print result
def eval(self, test_file_path, model_path): """ Evaluate model's performance on test data Args: test_file_path: path to the jsonl file containing test datamodel_path model_path: path to the model to be restored Returns: float number indicating test accuracy """ saver = tf.train.Saver(max_to_keep=500) saver.restore(self.sess, model_path) print("Model restored from " + str(model_path)) self.dp_test = DataProcessor(input_file_path=test_file_path) accuracies = [] for data in self.dp_test.get_single_data(): test_feed = { self.a: data["sentence1"], self.b: data["sentence2"], self.labels: data["gold_label"] } accuracy, predictions = self.sess.run([self.accuracy, self.h_output], feed_dict=test_feed) print("predictions for the batch: {}".format(predictions)) print("Actual gold labels: {}".format(test_data["gold_label"])) accuracies.append(accuracy) test_acc = sum(accuracies)/len(accuracies) print("Overall test accuracy is {}".format(test_acc)) return test_acc
def main(args): df = pd.read_csv(args.dataset) # df = df.iloc[::24,:] # Preprocess input and reshapes to # (num_samples, window_size, 1) processor = DataProcessor(window_size=args.window_size, forecast_size=args.forecast, shift=args.shift) train_X, train_y, test_X, test_y, raw_series = processor.preprocess(df) # train or load model lstm = LSTMModel(args.window_size, args.forecast) print(lstm.model.summary()) if not args.eval_only: lstm.fit(train_X, train_y, epochs=args.epochs) lstm.save(args.model_path) else: lstm.load(args.model_path) # evaluation and plots preds = lstm.predict(test_X[-1].reshape(1, -1, 1)) preds = processor.postprocess(preds) plot_test_datapoint(test_X[-1], test_y[-1], preds[0], args.forecast) preds_moving = moving_test_window_preds(lstm, test_X[0, :], n_future_preds=1000, step=args.forecast) preds_moving = np.array(preds_moving).reshape(-1, 1) preds_moving = processor.postprocess(preds_moving) plot_moving_window(df['datetime'], raw_series, preds_moving)
def load_tracks_into_dataset(dataset): """ Loads all valid tracks in PATH_TO_WAVS into existing dataset Args: dataset (dict): Description """ files = glob.glob(PATH_TO_WAVS + '*.wav') for file in files: try: genre, beets_id, y = get_metadata(file) for i in range(15): dataset['y'].append(np.array([y])) dp = DataProcessor(filepath=file) dp.load_data(n_secs=N_SECS) if MEL: X = dp.mel_spectrogram else: X = dp.spectrogram if X.shape != (64, 1022): continue dataset['X'].append(X.astype(float)) meta = {'beets_id': beets_id, 'genre': genre} dataset['meta'].append(meta) except Exception: logger.warning('Error loading id: {}'.format(beets_id), exc_info=True)
def test_make_lines(self): test_data_processor = DataProcessor() input1 = "string1\nstring2\n" input2 = "string1\nstring2\nstring3\n" output1 = ["string1", "string2"] output2 = ["string1", "string2", "string3"] self.assertEqual(test_data_processor.make_lines(input1), output1) self.assertEqual(test_data_processor.make_lines(input2), output2)
def test_filter_info_right_data_double_header(self): data = [ 'RecordId,EmployID,Name,Age,Year,Salary,Type', 'RecordId,EmployID,Name,Age,Year,Salary,Type' ] test_processor = DataProcessor() with self.assertRaises(ValueError): test_processor.filter_info(data)
def train(self, train_file_path, epoch_number, save_models = True): """ Train the attention NLI model stochastically Args: train_file_path: jsonl file path to training data epoch_number: number of epochs of training save_models: saving models for each epoch Notes: trained models are saved in models/ every 50 epochs """ saver = tf.train.Saver(max_to_keep=500) self.dp_train = DataProcessor(input_file_path=train_file_path) acc_list = [] loss_list = [] s1 = "someone to watch Netflix with me" s2 = "someone to watch TV shows" embeddings1 = self.dp_train.gloVe_embeddings(s1, self.token_count) embeddings2 = self.dp_train.gloVe_embeddings(s2, self.token_count) print("Prediction is:", self.sess.run(self.h_output, feed_dict = {self.a: embeddings1, self.b: embeddings2})) s1 = "a Penn student to chat for coffee" s2 = "chat and get to know a penn student" embeddings1 = self.dp_train.gloVe_embeddings(s1, self.token_count) embeddings2 = self.dp_train.gloVe_embeddings(s2, self.token_count) print("Prediction is:", self.sess.run(self.h_output, feed_dict = {self.a: embeddings1, self.b: embeddings2})) s1 = "a designer to cofound my startup" s2 = "a software designer interested in entrepreneurship" embeddings1 = self.dp_train.gloVe_embeddings(s1, self.token_count) embeddings2 = self.dp_train.gloVe_embeddings(s2, self.token_count) print("Prediction is:", self.sess.run(self.h_output, feed_dict = {self.a: embeddings1, self.b: embeddings2})) self.accuracy_records_by_epoch = [] for i in range(epoch_number): data_num = 0 for data in self.dp_train.get_single_data(): data_num = data_num + 1 data_feed_dict = { self.a: data["sentence1"], self.b: data["sentence2"], self.labels: data["gold_label"] } _, acc, loss = self.sess.run([self.train_op, self.accuracy, self.loss], feed_dict=data_feed_dict) acc_list.append(acc) loss_list.append(loss) if (data_num % 1000 == 0): print("At epoch: {}, {} data processed".format(i, data_num)) epoch_acc = sum(acc_list)/len(acc_list) epoch_loss = sum(loss_list)/len(loss_list) self.accuracy_records_by_epoch.append(epoch_acc) print("finishing epoch {}, training accuracy: {}, loss:{}".format(i, epoch_acc, epoch_loss)) if save_models: save_path = saver.save(self.sess, './models/', global_step=i) print("Model saved in file: %s" % save_path) elif not save_models and i + 1 == epoch_number: save_path = saver.save(self.sess, './models/', global_step=i) print("Model saved in file: %s" % save_path)
def main(ip, database_ip): # set up cockroachdb client Base = declarative_base() class Metrics(Base): __tablename__ = 'metrics' id = Column(sqltypes.VARCHAR, primary_key=True) ip = Column(sqltypes.VARCHAR) time = Column(sqltypes.VARCHAR) metric_name = Column(sqltypes.VARCHAR) labels = Column(sqltypes.JSON) metric_value = Column(sqltypes.FLOAT) engine = create_engine( 'cockroachdb://prom@{}/prometheus'.format(database_ip)) Session = sessionmaker(bind=engine) Base.metadata.create_all(engine) # set up processor data_processor = DataProcessor(None) # ip string collection = ip.split(".")[2] # main loop while True: try: r = requests.get("http://{}:9182/metrics".format(ip)) except Exception as e: break data = [] for line in r.text.split("\n"): data_point = data_processor.process_line(ip, line) if data_point is not None: data.append(data_point) metrics_to_add = [] for data_point in data: computer_ip = data_point["computer_ip"] metric_name = data_point["data_type"] metric_value = data_point["value"] del data_point["computer_ip"] del data_point["data_type"] del data_point["value"] metrics_to_add.append( Metrics(id=str(uuid.uuid4()), ip=computer_ip, time=str(datetime.now()), metric_name=metric_name, metric_value=metric_value, labels=json.dumps(data_point))) session = Session() session.add_all(metrics_to_add) session.commit() session.close() time.sleep(60)
def __init__(self, root_dir, video_title): self.dp = DataProcessor(root_dir, video_title) self.viewer = Viewer self.max_frame = max_frame self.results = {} with open(eval_file, "r") as f: self.results = json.load(f) self.img_path_list = self.get_img_path_list()
def test_make_val(self): test_data_processor = DataProcessor() val1 = "5" val2 = "5.2" val3 = "5.0" val4 = "char" self.assertEqual(test_data_processor.make_val(val1), 5) self.assertEqual(test_data_processor.make_val(val2), 5.2) self.assertEqual(test_data_processor.make_val(val3), 5) self.assertEqual(test_data_processor.make_val(val4), "char")
def __init__(self, params, dataset_name): self.params = params # self.dataset = dataset self.variables_data = dict() self.dataset_vars = list() #List with names and characteristics of the dataset variables self.variables_dataset = list() #List with names and characteristics of the template variables self.variables_template = list() dataset_var_type = self.params.template.variables_dataset['type'] if dataset_var_type == "multiple": self.data_processor = DataProcessor( dataset_name, dataset_var_type, self.params.template.variables_dataset['aggregation']) else: self.data_processor = DataProcessor(dataset_name, dataset_var_type) self.reverse_data = False if self.params.template.variables_dataset[ 'reverse'] == "false" else True #Se agrega las dimensiones del dataset self.data_processor.add_dimensions_variables(self.params.template.variables_dataset.lat.cdata,\ self.params.template.variables_dataset.lon.cdata,\ self.params.template.variables_dataset.time.cdata,\ self.reverse_data) if self.params.template.output["type"] == "images": self.template_dimensions = dict() self.template_dimensions['max_lat'] = int( self.params.template.layers["max_lat"]) self.template_dimensions['max_lon'] = int( self.params.template.layers["max_lon"]) self.template_dimensions['min_lat'] = int( self.params.template.layers["min_lat"]) self.template_dimensions['min_lon'] = int( self.params.template.layers["min_lon"]) self.interpolation_factor = int( self.params.template.layers["interpolation_factor"]) if self.params.template.output["type"] == "csv": self.template_dimensions = dict() self.template_dimensions[ 'max_lat'] = self.data_processor.raw_variables["lat"].max() self.template_dimensions[ 'max_lon'] = self.data_processor.raw_variables["lon"].max() self.template_dimensions[ 'min_lat'] = self.data_processor.raw_variables["lat"].min() self.template_dimensions[ 'min_lon'] = self.data_processor.raw_variables["lon"].min() print(self.template_dimensions) self.interpolation_factor = int( self.params.template.points["interpolation_factor"])
def test_make_name(self): test_data_processor = DataProcessor() name1 = "" name2 = "name" name3 = "word1 word2" name4 = "word1 word2 word3" self.assertEqual(test_data_processor.make_name(name1), "") self.assertEqual(test_data_processor.make_name(name2), "name") self.assertEqual(test_data_processor.make_name(name3), "word1_word2") self.assertEqual(test_data_processor.make_name(name4), "word1_word2_word3")
def main(): songs, notes = DataProcessor.get_parsed_data(config.MIDI_FILES_DIR) vocab = len(set(notes)) network_input, network_output = DataProcessor.prepare_sequences(notes, vocab) lstm = LSTM(network_input, vocab, config.WEIGHTS_DUMP, config.PRETRAINED_MODEL) # lstm = LSTM(network_input, vocab, config.WEIGHTS_DUMP) # lstm.train(network_input, network_output) song_writer = SongWriter(lstm, notes, songs) song_writer.write_song(config.OUTPUT_DIR + config.SONG_NAME + config.MIDI_EXTENSION)
class FeatureExtractor(object): ''' Controls the processing chain and fetches the values needed for the classificator ''' def __init__(self, dataCollector): self.inputQueue = Queue() self.outputQueue = Queue() self.extractQueue = Queue() self.sigUtil = SignalUtil() self.eegUtil = EEGUtil() self.collector = dataCollector self.collectorThread = threading.Thread(target=self.collector.collectData) self.processor = DataProcessor(self.inputQueue, self.outputQueue) self.processingThread = threading.Thread(target=self.processor.processData) self.extract = True def start(self): '''setting data handler and starts collecting''' print("%s: starting feature extractor" % self.__class__.__name__) self.collector.setHandler(self.handleDataSet) self.collectorThread.start() self.processingThread.start() while self.extract: try: procData = self.outputQueue.get(timeout=1) self.extractFeatures(procData) except Empty: pass def extractFeatures(self, data): features = [] for _, sigData in data.iteritems(): theta = self.eegUtil.getThetaChannel(sigData["fft"]) features.extend(theta) self.extractQueue.put(array(features)) def handleDataSet(self, data): '''Add the given data to the processingQueue''' self.inputQueue.put(data) def close(self): self.processor.close() self.processingThread.join() self.collector.close() self.collectorThread.join() print("%s: closing feature extractor" % self.__class__.__name__)
def __init__(self, model_type=None, dummy=False, config_file=None): self.dummy = dummy #LOGGING FOLDERS self.log_dir = "tf_logs/" if not os.path.isdir(self.log_dir): os.mkdir(self.log_dir) self.cp_dir = "tf_models/" if not os.path.isdir(self.cp_dir): os.mkdir(self.cp_dir) self.config_dir = "model_configs/train/" if not os.path.isdir(self.config_dir): os.mkdir(self.config_dir) #IF THE CONFIG FILE WAS GIVEN AS INPUT #USES THAT if config_file != None: config_path = self.config_dir + config_file with open(config_path, "r") as f: config = json.load(f) self.warm_start = True #IF THE CONFIG FILE WAS NOT GIVEN AS INPUT #THE DEFAULT FILE WILL BE USED else: config_file = "main_config.json" config_path = f"{self.config_dir.split('/')[0]}/{config_file}" with open(config_path, "r") as f: config = json.load(f) #Checks the model type input assert type(model_type) == str, f"Invalid model type:{model_type}" assert True == (model_type in config.keys( )), f"Given model type was not found in config file: {model_type}" #DUMMY vs REAL version_str = "dummy" if self.dummy else "real" config["data"] = config["data"][version_str] #DROP USELESS KEYS FROM THE DICT for key in list(config.keys()): if key not in ["data", model_type]: config.pop(key) self.warm_start = False #DATA PROCESSOR self.dp = DataProcessor(config["data"], self.dummy) #CREATE THE MODEL self.model = self.build_model(config) #STORE THE CONFIG DICT FOR LATER self.config = config
def setUp(self): super(TestDataProcessor, self).setUp() spec_file = (self.resource_folder / "valid_data_processing_spec.json").absolute() self.data_processor = DataProcessor(spec_file=spec_file) self.existing_csv_file = (self.resource_folder / "output.csv").absolute() self.existing_hash_csv_file = (self.resource_folder / "output_hash.csv").absolute() self.csv_file = (self.temp_folder / "output.csv").absolute() self.hash_csv_file = (self.temp_folder / "output_hash.csv").absolute()
def updatedb(args): file_name = args.file_name dir_name = args.dir_name datproc = DataProcessor() if file_name is not None and dir_name is not None: sys.stderr.write('Error: please assign only one file name or directory name') elif file_name is not None: datproc.update_database_from_file(file_name) elif dir_name is not None: datproc.update_database_from_dir(dir_name) else: sys.stderr.write('Error: no file name or directory name specified')
def test_check_attr(self): test_data_processor = DataProcessor() test_attributes = ["name", "last name"] test_line1 = "change name" test_line2 = "change last name" self.assertEqual( test_data_processor.check_attr("name", test_attributes, test_line1), 1) self.assertEqual( test_data_processor.check_attr("name", test_attributes, test_line2), 0) self.assertEqual( test_data_processor.check_attr("last name", test_attributes, test_line2), 1)
def main(): config = json.load(open('config.json', 'r')) set_seed(config["seed"]) if not os.path.exists(config["output_dir"]): os.makedirs(config["output_dir"]) if not os.path.exists(config["save_dir"]): os.makedirs(config["save_dir"]) # model_config = transformers.BertConfig.from_pretrained(config["model_name"]) # tokenizer = AutoTokenizer.from_pretrained(config["model_name"]) tokenizer = BertTokenizer.from_pretrained(config["model_name"]) model = BertForClassification(config["model_name"]) # model = AutoModelForMultipleChoice.from_pretrained(config["model_name"]) model.cuda() processor = DataProcessor(config["data_dir"]) train_examples = processor.get_train_examples() train_dataset = processor.get_dataset(train_examples, tokenizer, config["max_length"]) valid_examples = processor.get_dev_examples() valid_dataset = processor.get_dataset(valid_examples, tokenizer, config["max_length"]) test_examples = processor.get_test_examples() test_dataset = processor.get_dataset(test_examples, tokenizer, config["max_length"]) train(config, model, train_dataset, valid_dataset) result = evaluate(config, model, test_dataset) print(result[:2])
def loadLevel(): json_path = "../TheVGLC/Super Mario Bros/Multi-layer/smb-multi-layer.json" level_path = "../TheVGLC/Super Mario Bros/Multi-layer/Structural Layer/mario-1-1.txt" loader = DataLoader() loader.loadJson(json_path) loader.loadFile(level_path) original_size = loader.loaded_data[0].shape processor = DataProcessor(loader.loaded_data[0]) processor.makeSegments(8, 1) return loader, processor, original_size
class DataProcessorTests(unittest.TestCase): def setUp(self): dict = {"id":1,"data":[[84,0,1456010413839],[72,0,1456010413903],[84,1,1456010413938],[73,0,1456010413979],[72,1,1456010414050],[83,0,1456010414079],[73,1,1456010414138],[83,1,1456010414214],[84,0,1456010414444],[89,0,1456010414508],[84,1,1456010414539],[89,1,1456010414587],[80,0,1456010414703],[80,1,1456010414799],[73,0,1456010414963],[78,0,1456010415063],[73,1,1456010415126],[78,1,1456010415182],[71,0,1456010415205],[71,1,1456010415332],[69,0,1456010415801],[69,1,1456010415892],[88,0,1456010416026],[88,1,1456010416125],[69,0,1456010416266],[82,0,1456010416350],[69,1,1456010416433],[82,1,1456010416461],[67,0,1456010416609],[67,1,1456010416684],[73,0,1456010416689],[67,0,1456010416777],[73,1,1456010416784],[67,1,1456010416900],[69,0,1456010416936],[69,1,1456010417027],[83,0,1456010417490],[83,1,1456010417593],[83,0,1456010419669],[83,1,1456010419756],[69,0,1456010419834],[69,1,1456010419941],[73,0,1456010420333],[83,0,1456010420406],[73,1,1456010420461],[83,1,1456010420497],[65,0,1456010420631],[65,1,1456010420759],[83,0,1456010420838],[83,1,1456010420950],[84,0,1456010421070],[84,1,1456010421133],[82,0,1456010421220],[82,1,1456010421324],[65,0,1456010421401],[78,0,1456010421510],[65,1,1456010421545],[71,0,1456010421602],[78,1,1456010421609],[69,0,1456010421702],[71,1,1456010421741],[69,1,1456010421853],[74,0,1456010422162],[74,1,1456010422261],[85,0,1456010422311],[85,1,1456010422415],[77,0,1456010422457],[66,0,1456010422545],[77,1,1456010422552],[66,1,1456010422652],[76,0,1456010422656],[76,1,1456010422760],[69,0,1456010422779],[69,1,1456010422912],[79,0,1456010423361],[79,1,1456010423460],[70,0,1456010423569],[70,1,1456010423649],[65,0,1456010423819],[65,1,1456010423954],[75,0,1456010424045],[75,1,1456010424160],[87,0,1456010424198],[87,1,1456010424310],[65,0,1456010424388],[65,1,1456010424532],[82,0,1456010424614],[82,1,1456010424722],[68,0,1456010424819],[68,1,1456010424926],[80,0,1456010425556],[80,1,1456010425667],[72,0,1456010425788],[82,0,1456010425856],[72,1,1456010425883],[82,1,1456010425943],[65,0,1456010426024],[83,0,1456010426120],[65,1,1456010426163],[83,1,1456010426235],[69,0,1456010426311],[69,1,1456010426406],[83,0,1456010426471],[188,0,1456010426567],[83,1,1456010426634],[188,1,1456010426706],[82,0,1456010427221],[82,1,1456010427312],[69,0,1456010427391],[80,0,1456010427439],[69,1,1456010427482],[80,1,1456010427546],[82,0,1456010427550],[82,1,1456010427630],[69,0,1456010427694],[69,1,1456010427797],[83,0,1456010427919],[83,1,1456010427994],[69,0,1456010428081],[69,1,1456010428177],[78,0,1456010428186],[78,1,1456010428281],[84,0,1456010428294],[84,1,1456010428397],[73,0,1456010428401],[78,0,1456010428490],[73,1,1456010428557],[71,0,1456010428578],[78,1,1456010428597],[71,1,1456010428717],[84,0,1456010428834],[72,0,1456010428901],[84,1,1456010428929],[72,1,1456010429025],[69,0,1456010429041],[69,1,1456010429165],[81,0,1456010429392],[85,0,1456010429496],[81,1,1456010429535],[73,0,1456010429544],[85,1,1456010429611],[73,1,1456010429663],[84,0,1456010429734],[84,1,1456010429817],[69,0,1456010429910],[69,1,1456010430005],[83,0,1456010430121],[83,1,1456010430176],[83,0,1456010430259],[83,1,1456010430354],[69,0,1456010430422],[78,0,1456010430486],[69,1,1456010430537],[67,0,1456010430586],[78,1,1456010430593],[69,0,1456010430682],[67,1,1456010430733],[69,1,1456010430829],[79,0,1456010430964],[70,0,1456010431079],[79,1,1456010431082],[70,1,1456010431187],[69,0,1456010431377],[69,1,1456010431465],[88,0,1456010431617],[88,1,1456010431708],[81,0,1456010431874],[85,0,1456010431942],[81,1,1456010431997],[73,0,1456010432006],[85,1,1456010432057],[73,1,1456010432089],[83,0,1456010432114],[73,0,1456010432194],[83,1,1456010432229],[73,1,1456010432289],[84,0,1456010432349],[84,1,1456010432424],[69,0,1456010432512],[69,1,1456010432640],[68,0,1456010432756],[73,0,1456010432848],[68,1,1456010432854],[73,1,1456010432959],[83,0,1456010432986],[83,1,1456010433093],[71,0,1456010434948],[82,0,1456010435056],[71,1,1456010435115],[82,1,1456010435191],[65,0,1456010435323],[65,1,1456010435474],[80,0,1456010435483],[80,1,1456010435593],[72,0,1456010435680],[83,0,1456010435760],[72,1,1456010435799],[83,1,1456010435919],[68,0,1456010436392],[73,0,1456010436480],[68,1,1456010436503],[73,1,1456010436587],[67,0,1456010436596],[67,1,1456010436707],[84,0,1456010436809],[84,1,1456010436877],[65,0,1456010436998],[84,0,1456010437158],[65,1,1456010437169],[69,0,1456010437262],[84,1,1456010437321],[69,1,1456010437405],[83,0,1456010437502],[83,1,1456010437653],[68,0,1456010437978],[68,1,1456010438114],[66,0,1456010438987],[89,0,1456010439107],[66,1,1456010439118],[89,1,1456010439238],[65,0,1456010439288],[65,1,1456010439471],[70,0,1456010440002],[79,0,1456010440070],[70,1,1456010440101],[79,1,1456010440193],[82,0,1456010440213],[82,1,1456010440309],[69,0,1456010440368],[73,0,1456010440428],[69,1,1456010440503],[78,0,1456010440528],[73,1,1456010440587],[78,1,1456010440659],[77,0,1456010441514],[73,0,1456010441599],[68,0,1456010441667],[77,1,1456010441678],[73,1,1456010441718],[68,1,1456010441782],[71,0,1456010441900],[71,1,1456010441996],[69,0,1456010442030],[84,0,1456010442115],[69,1,1456010442182],[84,1,1456010442222],[190,0,1456010442508],[190,1,1456010442631]]} self.dp = DataProcessor(dict) def testNgrams(self): self.assertListEqual([ (1,2), (2,3), (3,4) ], self.dp.ngrams([1,2,3,4], 2)) self.assertListEqual([ (1,2,3), (2,3,4), (3,4,5) ], self.dp.ngrams([1,2,3,4,5], 3)) def testProcess(self): self.dp.preprocess() f = self.dp.process()
def TestExtNegativeSampling(): dataFile = "./dataset/samples/qa-dump-1460090355004_new.json" dataProvider = DataProcessor(dataFile) nNegSample = 100 dataProvider.NegSampleExt(nNegSample) for title in dataProvider.data.keys(): article = dataProvider.data[title] for i in range(len(article["answers"])): for negSample in article["negExtSamples"][i]: print " ".join(SentenceToWord( (article["answers"][i], ))[0]), negSample assert " ".join(SentenceToWord( (article["answers"][i], ))[0]) in negSample print "Extension negative sampling test passed!"
def __process_one_product__(self): df = self.products.head(1) measurement = SentinelMeasurement( api=self.__api__, geojson_path=self.__geojson_path__, dataframe=df, autofetch=True ) dp = DataProcessor( measurement, lambda tiff,result, profile: self.__save_result__(measurement, tiff, result, profile)) dp.process_data(df) self.products = self.products.iloc[1:] products_left = len(self.products.index) print("{} measurements are left".format(products_left))
def aquire_and_append_metrics(inlet, fs, data_processor: DataProcessor): """Get metrics from inlet and append to data processor Parameters: ----------- Returns: -------- None: updates dataprocessor """ # Obtain EEG data from the LSL stream eeg_data, timestamp = acquire_eeg_data(inlet, fs) data_processor.feed_new_data(eeg_data=eeg_data) # Feed new data generated in the epoch data_processor.append_metrics()
def test(self, load_model=False): """ test :param load_model: :return: """ if load_model: print 'Start loading model from "%s"' % self.config.load_model_path self.model.load_state_dict(torch.load(self.config.load_model_path)) test_loader = DataProcessor(self.config.test_file, self.config.batch_size).load() with torch.no_grad(): correct = 0 total = 0 for features, labels in test_loader: features = features.to(device) _, labels = torch.max(labels, 1) labels = labels.to(device) outputs = self.model(features) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print 'Test Accuracy of the model: {} %'.format(100 * correct / total)
def train(self): """ train :return: """ print 'Start training model.' train_loader = DataProcessor(self.config.training_file, self.config.batch_size).load() total_step = len(train_loader) for epoch in range(self.config.training_epoch): for i, (features, labels) in enumerate(train_loader): features = features.to(device) _, labels = torch.max(labels, 1) # 元组第一个维度为最大值,第二个维度为最大值的索引 labels = labels.to(device) # Forward pass outputs = self.model(features) loss = self.criterion(outputs, labels) # Backward and optimize self.optimizer.zero_grad() # 清空梯度缓存 loss.backward() # 反向传播,计算梯度 self.optimizer.step() # 利用梯度更新模型参数 if (i + 1) % 100 == 0: print 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'\ .format(epoch + 1, self.config.training_epoch, i + 1, total_step, loss.item()) # Save the model checkpoint print 'Start saving model to "%s".' % self.config.save_model_path torch.save(self.model.state_dict(), self.config.save_model_path)
def __init__(self, main_window, c_logger=None, data_processor=None): """ Init method of the 'MainWindow' class. :param main_window: Instance of the main Tk window. :param c_logger: Logger instance (ColoredLogger type is recommended). Default is MAIN_LOGGER (Global variable.) :param data_processor: Instance of DataProcessor module. """ self.c_logger = c_logger if c_logger else self.__set_up_default_logger( ) self.main_window = main_window self.c_logger.info("Get main window: {}".format(main_window)) self.c_logger.info("Creating DataProcessor instance.") self.data_processor = (data_processor if data_processor else DataProcessor(c_logger=self.c_logger)) self.c_logger.info("DataProcessor instance successfully created.") self.__create_main_gui_section() self.__create_personal_gui_section() self.__create_horizontal_separator_lines() self.__create_vertical_separator_lines() self.__set_resizable(row=9, col=3)
class TestProcessingChain(unittest.TestCase): # TODO test queue and threading def setUp(self): inputQueue = Queue() outputQueue = Queue() self.processor = DataProcessor(inputQueue, outputQueue) def test_process(self): self.processor.process(TEST_DATA) def test_splitData(self): eegData, gyroData = self.processor.splitData(TEST_DATA) intersect = set(eegData) & set(gyroData) self.assertTrue(len(intersect) == 0) self.assertTrue("F3" in eegData) self.assertTrue("X" in gyroData)
def extract(args): output_file_name = args.output_file_name extraction_method = args.extraction_method label_type = args.label_type if output_file_name is None: sys.stderr.write('Error: no file name to output is specified') sys.exit() if extraction_method is None: extraction_method = '3-gram' # default: 3-gram if label_type is None: label_type = 'compiler' # default: compiler datproc = DataProcessor() datproc.save_all_data_in_svmlight_format(output_file_name, extraction_method, label_type)
def _data_procesor_init_(self): filters = [data_filter.Invertor(), data_filter.SelfAdjustableNotchFilter()] self.data_processor = DataProcessor(self.plotter.plot_valid, self.plotter.plot_error, filters) for filter_ in filters: name = filter_.get_name() name_repr = GTK_Wrapper.get_wrapper(name).get_gui_object() self.gui_filter_settings_box.pack_start(name_repr, True, True, 0) filter_settings_mgr = filter_.settings_manager() self._add_all_params(filter_settings_mgr, self.gui_filter_settings_box)
def __init__(self, dataCollector): self.inputQueue = Queue() self.outputQueue = Queue() self.extractQueue = Queue() self.sigUtil = SignalUtil() self.eegUtil = EEGUtil() self.collector = dataCollector self.collectorThread = threading.Thread(target=self.collector.collectData) self.processor = DataProcessor(self.inputQueue, self.outputQueue) self.processingThread = threading.Thread(target=self.processor.processData) self.extract = True
class GUI: GLADE_FILE = "GUI.glade" EXPORT_RESPONSE_OK = 1 def __init__(self): self.builder = Gtk.Builder() self.builder.add_from_file(GUI.GLADE_FILE) # order is important! self._gui_elements_init_() self._graph_init_() self._data_procesor_init_() self._provider_init_() self.is_active = False self.builder.connect_signals(self) self.builder.get_object("main_window").show_all() self.stop() # def _gui_elements_init_(self): #attach elements to paned graph_window = self.builder.get_object("graph_window") control_panel = self.builder.get_object("control_panel") working_area_paned = self.builder.get_object("working_area_paned") #graph to the left: resizable #control panel to the right working_area_paned.pack1(graph_window, resize=True, shrink=True) working_area_paned.pack2(control_panel, resize=False, shrink=True) #start button self.gui_start_btn = self.builder.get_object("start_btn") self.gui_start_label = self.builder.get_object("start_lbl") self.gui_stop_label = self.builder.get_object("stop_lbl") #provider settings self.gui_provider_settings_area = self.builder.get_object("provider_settings_alignment") # self.gui_provider_settings_box = self.builder.get_object("provider_settings_box") #filter settings self.gui_filter_settings_box = self.builder.get_object("filter_settings_box") #export dialog self.gui_export_btn = self.builder.get_object("export_btn") self.gui_export_dialog = None #error message dialog self.gui_error_message_dialog = None def _data_procesor_init_(self): filters = [data_filter.Invertor(), data_filter.SelfAdjustableNotchFilter()] self.data_processor = DataProcessor(self.plotter.plot_valid, self.plotter.plot_error, filters) for filter_ in filters: name = filter_.get_name() name_repr = GTK_Wrapper.get_wrapper(name).get_gui_object() self.gui_filter_settings_box.pack_start(name_repr, True, True, 0) filter_settings_mgr = filter_.settings_manager() self._add_all_params(filter_settings_mgr, self.gui_filter_settings_box) def _provider_init_(self): self.data_provider = RandomWalkDataProvider(onData = self.data_processor.new_data, onError = self.error_stop) #self.data_provider = SerialPortDataProvider(self.data_processor.new_data, self.error_stop) data_provider_settings_mgr = self.data_provider.settings_manager() self._add_all_params(data_provider_settings_mgr, self.gui_provider_settings_box) def _graph_init_(self): # Create graph #self.graph = MatplotlibGraph(onClose=self.stop) self.graph = GTK_Graph(self.builder.get_object("graph_area"), settings.GRAPH_COLORS, settings.DATA_MIN_VALUE, settings.DATA_MAX_VALUE) self.plotter = Plotter(self.graph) def _add_all_params(self, obj_settings_mgr, gui_setting_box): for param in obj_settings_mgr.all_params(): wrapper = GTK_Wrapper.get_wrapper(param) gui_obj = wrapper.get_gui_object() gui_setting_box.pack_start(gui_obj, True, True, 0) def close(self): self.stop() self.graph.close() def start(self): if self.is_active: self.stop() logger.to_log("start") self.is_active = True # handling with gui first!!! #disable port settings self.gui_provider_settings_area.set_sensitive(False) #rename start button self.gui_start_btn.set_label(self.gui_stop_label.get_text()) #disable export button self.gui_export_btn.set_sensitive(False) self.data_processor.enable() # start listening self.data_provider.activate() def error_stop(self, text): self.error_message(text) self.stop() def error_message(self, text): logger.to_log(text) if self.gui_error_message_dialog is None: self.gui_error_message_dialog = self.builder.get_object("error_message_dialog") self.gui_error_message_dialog.set_property("secondary-text", text) self.gui_error_message_dialog.run() self.gui_error_message_dialog.hide() def stop(self): logger.to_log("stop") self.data_provider.deactivate() self.data_processor.disable() self.is_active = False #enable settings self.gui_provider_settings_area.set_sensitive(True) #rename start button self.gui_start_btn.set_label(self.gui_start_label.get_text()) #enable export button self.gui_export_btn.set_sensitive(True) def on_main_window_delete_event(self, *args): self.close() Gtk.main_quit() def on_start_clicked(self, *args): if self.is_active: self.stop() else: self.start() def on_export_clicked(self, *args): if self.gui_export_dialog is None: self.gui_export_dialog = self.builder.get_object("export_filechooser_dialog") fproc = FileProcessor() self.gui_export_dialog.set_current_name(fproc.get_name()) response = self.gui_export_dialog.run() if response == gui.EXPORT_RESPONSE_OK: fproc.set_name(self.gui_export_dialog.get_filename()) fproc.do_export(self.data_processor, onError=self.error_message) self.gui_export_dialog.hide()
import numpy as np from data_processor import DataProcessor from neural_network import NeuralNetwork if __name__ == '__main__': data_processor = DataProcessor() x_train, y_train = data_processor.get_train_set() x_test, y_test = data_processor.get_test_set() input_nodes = 2 hidden_nodes = 3 output_nodes = 1 network = NeuralNetwork(input_nodes=input_nodes, hidden_nodes=hidden_nodes, output_nodes=output_nodes, lr=0.01) network.train(x_train, y_train) score = network.evaluate(x_test, y_test) print(score) x = np.array([[1, 1], [10, 10], [100, 100], [2000, 1000]], dtype=float) y = network.predict(x) print(x)
def setUp(self): inputQueue = Queue() outputQueue = Queue() self.processor = DataProcessor(inputQueue, outputQueue)