def __init__(self, filename: str): self.basename = os.path.splitext(filename)[0] with open(f'sc/{filename}', 'rb') as fh: buffer = fh.read() fh.close() decompressor = Decompressor() buffer = decompressor.decompress(buffer) Reader.__init__(self, buffer, 'little') self.is_texture = self.basename.endswith('_tex') if self.is_texture: if not os.path.exists('png'): os.mkdir('png') self.shape_count: int = 0 self.clips_count: int = 0 self.textures_count: int = 0 self.text_fields_count: int = 0 self.matrix_count: int = 0 self.color_transformations_count: int = 0 self.shapes: list = [] self.clips: list = [] self.textures: list = [] self.text_fields: list = [] self.matrix: list = [] self.color_transformations: list = [] self.exports: list = []
def main(): lib = Library() user1 = Reader("Djinn", "Co", 25) user2 = Reader("Artem", "KFC", 48) user3 = Reader("Suga", "Rogue", 35) book1 = Book("Ubik", "Philip k. Dick", 1969) book2 = Book("The Last Wish", "Andrzej Sapkowski", 2007) book3 = Book("It", "Stephen King", 1986) print(' ') lib.add_book_to_lib(book1) lib.add_book_to_lib(book2) lib.add_book_to_lib(book3) print(' ') lib.add_user_to_readerslist(user1) lib.add_user_to_readerslist(user2) lib.add_user_to_readerslist(user3) print(' ') time.sleep(3) lib.give_book_to_user(user1, book1) lib.give_book_to_user(user3, book1) time.sleep(3) print(' ') lib.show_books(available=True) print(' ') lib.show_books(available=False) print(' ') lib.sort_books('year')
class ReaderBase(TestCase): result = None reader = None read_fixture = [ 'type, cash, price, bonus_ratio', '"white",15, 2, 3', '"milk", 12, 2, 5', '"dark", 13, 4, 1', '"white", 6, 2, 2', '"bozo", 3, 2, 1', ] expected_result = [['"white"', '15', ' 2', ' 3'], ['"milk"', ' 12', ' 2', ' 5'], ['"dark"', ' 13', ' 4', ' 1'], ['"white"', ' 6', ' 2', ' 2'], ['"bozo"', ' 3', ' 2', ' 1']] def setUp(self): self.reader = Reader("") @patch("utils.reader.Reader.read") def test_read_and_split(self, read): read.return_value = self.read_fixture self.assertEqual(self.reader.result(), self.expected_result) @patch("utils.reader.Reader.read") def test_split(self, read): read.return_value = ['Skip heading..\n', 'valid,data\n'] expected = [['valid', 'data']] self.assertEqual(self.reader.split(), expected)
def testMakeobjectsfromxml(self): reader = Reader() soup = reader.readfile() threads = reader.makeobjectsfromxml(soup) for thread in threads: for document in thread._documents: print(document.text)
def calculate( self, source_code: str, priority_code: str, buy_price_code: str, sell_price_code: str, market: Market, start_date: date, end_date: date, ): logging.debug( f"{TAG} Start calculate start_date = {start_date}, end_date = {end_date}" ) compile_result = self.compiler.compile(source_code, priority_code, buy_price_code, sell_price_code) executor = Executor("") reader = Reader(executor) field_list = list(compile_result.fields) required_field_list = [Field.open, Field.close, Field.is_active] for field in required_field_list: if field not in field_list: field_list.append(field) rows = reader.get_simulating_data( Universe.total, field_list, start_date, end_date, [Field.open, Field.close, Field.ticker_id, Field.low, Field.high]) if len(rows) == 0: return None now = time.time() total_df = pd.DataFrame(rows) total_job_count = len(compile_result.item_list) completed_job_count = 0 for item in compile_result.item_list: # TODO: is_rank 값 잘 들어가는지 계속 확인하고, 검증할것 # 랭크 함수는 날짜 단위로 동작 if item.is_rank: y = total_df.groupby("date", as_index=False).apply( lambda df: self._calculate(item.code, df)) else: # 랭크 함수 이외의 함수는 종목 단위로 동작 y = total_df.groupby("ticker_id", as_index=False).apply( lambda df: self._calculate(item.code, df)) different_columns = total_df.columns.symmetric_difference( y.columns) for column in different_columns: total_df.insert(0, column, y[column]) completed_job_count += 1 self.progress = Decimal(total_job_count / completed_job_count) logging.debug("{} execute time : {:0.3f}s".format( TAG, time.time() - now)) return total_df
def test_final(self): reader = Reader() soup = reader.readfile() threads = reader.makeobjectsfromxml(soup) tokenizer = Tokenizer(threads) collection_tokenized = tokenizer.tokenize() coll_model = CollectionModel(collection_tokenized) doc_model = DocumentModel(collection_tokenized) ret_model = RetrievalModel(collection_tokenized, doc_model, coll_model) ret_model.calculate_relevance()
def testtokenizerfromfile(self): reader = Reader() soup = reader.readfile() threads = reader.makeobjectsfromxml(soup) tokenizer = Tokenizer(threads) threads_tokenized = tokenizer.tokenize() for thread in threads_tokenized: print(thread._query._body) for document in thread._documents: print(document._text)
def testdocumentmodel(self): reader = Reader() soup = reader.readfile() threads = reader.makeobjectsfromxml(soup) tokenizer = Tokenizer(threads) threads_tokenized = tokenizer.tokenize() collection_model = CollectionModel(threads_tokenized) freq_collection = collection_model.calculate_frequency() print(freq_collection) document_model = DocumentModel(threads_tokenized) freq_document = document_model.calculate_frequency() print(freq_document)
def train(): print("training...") reader = Reader() with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() with tf.device('/cpu:0'): images, boxes = reader.distorted_inputs(FLAGS.train_dir, FLAGS.batch_size) logits = None #TODO loss = None #TODO train_op = None #TODO class _LoggerHook(tf.train.SessionRunHook): def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time #loss_value = run_values.results loss_value = .1 examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( '%s: step: %d, loss = %.2f (%.1f examples/sec, %.3f sec/batch)' ) print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_state_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto(log_device_placement=FLAGS. log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def run_pipeline(self): customer_order = CustomerOrder() read = Reader(self._file) line_order = ValidateInput() self._line_item = [] for order in read.result(): line_order.set(order) if line_order.valid_types: customer_order.order_type = line_order.value[0] customer_order.cash = line_order.value[1] customer_order.price = line_order.value[2] customer_order.bonus_ratio = line_order.value[3] self._line_item.append(customer_order.final_order)
def _get_preprocessed_df( self, preprocessed_filename: str, df_articles: DataFrame, document_type: DocumentType, overwrite: bool ) -> DataFrame: """ Helper function to get the preprocessed pandas dataframe. If the preprocessing already was done ones (JSON files exist) the tagging is not done again but the json files with the perprocessing are read into a pandas dataframe. If preprocessing is proceeded, the result will be stored in a json file. According to the document type, a different preprocessing is done. :param preprocessed_filename: Name of json file to store/ read the results of preprocessing. :param df_articles: Dataframe with the text to preprocess, if the data still needs to be preprocessed. :param document_type: Type of the document that is going to be preprocessed. :param overwrite: Determines if the previous data is allowed to be overwritten. :return: df_preprocessed: Pandas dataframe of the preprocessed input. """ json_path = "src/output/" + preprocessed_filename + ".json" if Path(json_path).exists() and not overwrite: return Reader.read_json_to_df_default(json_path) if document_type.value == DocumentType.ARTICLE.value: df_preprocessed = self._apply_preprocessing(df_articles, document_type, FilterType.PARTIES) elif document_type.value == DocumentType.PARAGRAPH.value: df_preprocessed = self._preprocess_paragraphs(df_articles) else: df_articles = df_articles[["title", "media"]].rename(columns={"title": "text"}) df_preprocessed = self._apply_preprocessing(df_articles, document_type, FilterType.NONE) return df_preprocessed
def ingest_data(self, filepath, dataset): """ load data from a file to a dataframe and store it on the db Parameters ---------- filepath : String file path of the .csv file for the dataset dataset: DataSet The DataSet object that holds the Session ID for HoloClean Returns ------- No Return """ # Spawn new reader and load data into dataframe fileReader = Reader(self.holoEnv.spark_session) df = fileReader.read(filepath) # Store dataframe to DB table schema = df.schema.names name_table = self._add_info_to_meta('Init', schema, dataset) self._dataframe_to_table(name_table, df) table_attribute_string = self.get_schema(dataset, "Init") count = 0 map_schema = [] attributes = table_attribute_string.split(',') for attribute in attributes: if attribute != "index": count = count + 1 map_schema.append([count, attribute]) dataframe_map_schema = self.holoEnv.spark_session.createDataFrame( map_schema, StructType([ StructField("index", IntegerType(), False), StructField("attribute", StringType(), True) ])) self.add_db_table('Map_schema', dataframe_map_schema, dataset) for tuple in map_schema: self.attribute_map[tuple[1]] = tuple[0] return
def ingest_data(self, filepath, dataset): """ Load data from a file to a dataframe and store it on the db filepath : String File path of the .csv file for the dataset dataset: DataSet The DataSet object that holds the Session ID for HoloClean """ # Spawn new reader and load data into dataframe filereader = Reader(self.holo_env.spark_session) # read with an index column df = filereader.read(filepath,1) # Store dataframe to DB table schema = df.schema.names name_table = dataset.table_specific_name('Init') self.dataframe_to_table(name_table, df) dataset.attributes['Init'] = schema count = 0 map_schema = [] attribute_map = {} for attribute in schema: if attribute != GlobalVariables.index_name: count = count + 1 map_schema.append([count, attribute]) attribute_map[attribute] = count dataframe_map_schema = self.holo_env.spark_session.createDataFrame( map_schema, dataset.attributes['Map_schema']) self.add_db_table('Map_schema', dataframe_map_schema, dataset) for table_tuple in map_schema: self.attribute_map[table_tuple[1]] = table_tuple[0] return df, attribute_map
def main(): args = parse_args() create_dirs(args.model_name, [args.checkpoint_dir, args.log_dir]) sess = tf.Session() logger = Logger(args, sess) model = Model(args, logger) reader = Reader(args, sess, logger) if args.action == 'train': trainer = Trainer(sess, model, reader, args, logger) trainer.train() else: predictor = Estimator(sess, model, reader, args, logger) predictor.predict()
def render_level(level, canvas): data = Reader.read_level(level) brick_dict = {} y1, y2 = 30, 60 for idx, rw in enumerate(data): x1, x2 = 0, int(canvas.SCREENWIDTH * 0.8 / 10) for brk in rw[1:]: if brk: brick = Brick(x1, y1, x2, y2, color=brk, resistance=rw[0]) brick.render_brick(canvas) brick_dict[id(brick)] = brick else: pass x1 += int(canvas.SCREENWIDTH * 0.8 / 10) x2 += int(canvas.SCREENWIDTH * 0.8 / 10) y1 += 30 y2 += 30 return brick_dict
loss_function = configs.loss hidden = configs.hidden reg = configs.reg n_neg_samples = configs.n_neg_samples dropout = configs.dropout if configs.debug: print( "loaded parameters dataset_name: %s, bern: %s, epochs: %d, batch_size: %d, learning_rate: %f, dim: %d, margin: %f, lr_decay: %f, loss_function: %s, hidden: %s" % (dataset_name, bern, epochs, batch_size, learning_rate, dim, margin, lr_decay, loss_function, hidden)) device = torch.device("cuda") os.environ["CUDA_VISIBLE_DEVICES"] = gpu reader = Reader(configs) n_train = reader.n_train n_ent = reader.n_ent n_rel = reader.n_rel stat = reader.stat corrupter = Corrupter(configs, n_ent, stat) def load_model(model_name): loaded_dict = torch.load( os.path.join(configs.save_path, model_name + ".mdl")) if model_name == "TransE": model = TransE(loaded_dict["configs"], n_ent, n_rel) else: model = ComplEx(loaded_dict["configs"], n_ent, n_rel)
def get_response(self): int_len = 4 header = self.s.recv(7) packet_length = int_len.from_bytes(header[2:5], 'big') data = self.recvall(self.s, packet_length) r = Reader(data) code = r.readUInt32() if code == 7 or code == 8: self.fingerprint = json.loads(r.readFinger()) # fingerprint r.readInt32() r.readShort() self.assets_url = r.readString() # assets url r.skip(23) r.readString() r.skip(2) r.readString() d = Downloader(self.fingerprint, self.assets_url) d.download() else: _(f"Recived code {code} - returning!") return
def create_data2(self, year, tourist_file=TOURIST_FILE, weather_file=WEATHER_FILE, weather_file2=WEATHER_FILE2): """ 输入年份会根据tourist_file和weather_file中的数据构造数据集 :param year: 数据年份,数值类型 :param tourist_file: 客流量数据文件,默认路径TOURIST_FILE :param weather_file: 天气数据文件,默认路径WEATHER_FILE :param weather_file2: 天气详细数据文件,默认路径WEATHER_FILE2 :return: """ tourist_file = tourist_file.replace('YYYY', str(year)) weather_file = weather_file.replace('YYYY', str(year)) weather_out = self.WEATHER_OUT.replace('YYYY', str(year)) holiday_out = self.HOLIDAY_OUT.replace('YYYY', str(year)) file_out = self.FILE_OUT.replace('YYYY', str(year)) if not os.path.isfile(weather_out): Reader().get_weather(weather_file, weather_out) weather_dict = Reader().read_weather(weather_out) weather_dict2 = Reader().read_weather2(weather_file2) # 从api获得year的节假日数据存入holiday_out中,避免多次http请求 if not os.path.isfile(holiday_out): IfHoliday().get_year_holiday(year, holiday_out) holiday_dict = Reader().read_holiday(holiday_out) tourist_dict = Reader().read_tourist( tourist_file) #用字典可能会导致日期顺序不对,影响特征工程正确率 with codecs.open(file_out, 'a+', 'utf-8') as fout: fout.write( "scenic_area,date,tourist,holiday,weather,min_temperature,max_temperature,mean_temperature," + "humidity,wind_speed,precipitation,cloudage" + "\n") for date, tourist in tourist_dict.items(): name = "上饶灵山景区" #景区名称 weather = weather_dict[date][0] # max_temperature = weather_dict[date][1] # min_temperature = weather_dict[date][2] if date in weather_dict2.keys(): weather_info = str(weather_dict2[date]).strip('[]') else: weather_info = str("None," * 7).strip('[],') '''下面这部分代码可以考虑放进IfHoliday()中''' holiday = 0 day = datetime.strptime(date.replace('-', ''), "%Y%m%d").date() if date in holiday_dict.keys(): if holiday_dict[date]: holiday = 2 elif day.weekday() in [5, 6]: holiday = 1 text = name + "," + date + "," + str(tourist) + "," + str( holiday) + "," + weather + "," + weather_info + "\n" fout.write(text)
from pathlib import Path from preprocessing import Preprocessing from sentiment_gui import SentimentGUI from tfidf_sentiment import TfidfSentiment from utils.arguments import parse_arguments from utils.comparison import Comparison from utils.labeling import Labeling from utils.reader import Reader from utils.writer import Writer if __name__ == "__main__": args = parse_arguments() # Read articles from json df_articles = Reader.read_articles(args.number_of_articles) # Apply preprocessing preprocessing = Preprocessing() df_paragraphs = preprocessing.get_paragraphs( df_articles, overwrite=args.force_processing) # Calculate sentiment of paragraphs tfidf_sentiment = TfidfSentiment(df_paragraphs) tfidf_sentiment.get_context_polarity(8) tfidf_sentiment.calculate_sentiment_score() tfidf_sentiment.map_sentiment() # Label data if args.labeling is not None: labeling = Labeling(df_paragraphs)
def setUp(self): self.reader = Reader("")
def __init__(self, path: str): self.dataframe = Reader.read_json_to_df_default(path) self.tfidf_sentiment = TfidfSentiment(self.dataframe)
# TODO = Parametros : Tamaño , Dimension del tablero , transversal ga.start() ga.report(0) def play_with_hc(initialSate, dimension): hc = HillClimbing(initialSate, dimension, False) # TODO = Usando tablero Incial como Estado , Dimension del tablero , # Poner True si desea reiniciar al momento de encontrar la solucion hc.start() hc.report() dimension = 10 # TODO = Se crea un tablero de 10 * 10 r = Reader('sample.txt') # TODO = Archivo de ejemplo para la creacion del tablero board = r.readFile() initialSate = State(board) # TODO = Setear el tablero en arrays para ser trabajado en el algoritmo if PLAT_WITH == 'GA': play_with_hc(initialSate, dimension) else: play_with_ga(dimension) # TODO = Conclusion # Al parecer, aquí el algoritmo genético está que realiza mucho más rápido # La lógica de encontrar la mejor solucion, en caso de que haya un bucle, # reiniciar el juego
def read_data_from_file(self): my_reader = Reader() w_array = my_reader.read_report("generator" + "/" + self.filename + "/" + self.filename + "Output.txt") return w_array
def get_correct_array(self, current_file_path): my_reader = Reader() init_array = my_reader.read(current_file_path) return init_array
def decode(self, buffer): Reader.__init__(self, buffer)
"batch_size": 20, "embedding_dims": 100, "nb_filter": 250, "filter_length": 20, "pool_length": 2, "hidden_size": 200, "nb_epoch": 50, "dropout": 0.5, "train_file": "data/train_pdtb_imp.json", "vocab_file": "data/vocab", "test_file": "", "valid_file": "data/dev_pdtb_imp.json", "vocab_size": 100000, } print str(conf) reader = Reader(conf) reader.get_full_train_data() reader.get_full_valid_data(get_id=True) features = [[[], []], [[], []]] targets = [] v_features = [[[], []], [[], []]] v_targets = [] v_id = [] v_im_features = [[[], []], [[], []]] v_im_targets = [] v_im_id = [] # for i in xrange(len(reader.train)): # features[0].append(reader.train[i][0][0]) # features[1].append(reader.train[i][0][1])
parser.add_argument('input', metavar='txt_file', type=str, help='The path to txt file') args = parser.parse_args() model_rhyme = Rhyme() sent = SentimentExtractor() model_rhyme.load_model() counter = 1 src = args.input dest = src.replace("_txt", "_labeled") reader = Reader(src) # Read src file line by line with open(src, mode="r", encoding="utf-8") as src_file: content = src_file.readlines() # Open dest file dest_file = open(dest, mode="a", encoding="utf-8") stanza = list() header = "" footer = "" time_epoch = "" sentiment = "" tracker = 0 # Define time epoch based on name of txt file if ("1600" in src and "1700" in src) or ("1500" in src and "1600" in src):
def run(from_date: datetime.date, to_date: datetime.date): executor = Executor("") reader = Reader(executor) return back_test(reader, from_date, to_date)
parser.add_argument('input', metavar='input_folder', type=str, help='The path to input folder') args = parser.parse_args() path = args.input counter = 1 for (dirpath, dirnames, filenames) in os.walk(path): for filename in filenames: src = os.path.join(dirpath, filename) # Debug print("File %d at: %s" % (counter, src)) dest = dirpath + "_txt.txt" try: reader = Reader(src) reader.get_poem() reader.convert_to_txt(src, dest) except: print("Error: Skip this file") continue counter += 1