Example #1
0
    def __init__(self, filename: str):
        self.basename = os.path.splitext(filename)[0]

        with open(f'sc/{filename}', 'rb') as fh:
            buffer = fh.read()
            fh.close()

        decompressor = Decompressor()
        buffer = decompressor.decompress(buffer)

        Reader.__init__(self, buffer, 'little')

        self.is_texture = self.basename.endswith('_tex')
        if self.is_texture:
            if not os.path.exists('png'):
                os.mkdir('png')

        self.shape_count: int = 0
        self.clips_count: int = 0
        self.textures_count: int = 0
        self.text_fields_count: int = 0
        self.matrix_count: int = 0
        self.color_transformations_count: int = 0

        self.shapes: list = []
        self.clips: list = []
        self.textures: list = []
        self.text_fields: list = []
        self.matrix: list = []
        self.color_transformations: list = []

        self.exports: list = []
Example #2
0
def main():
    lib = Library()
    user1 = Reader("Djinn", "Co", 25)
    user2 = Reader("Artem", "KFC", 48)
    user3 = Reader("Suga", "Rogue", 35)

    book1 = Book("Ubik", "Philip k. Dick", 1969)
    book2 = Book("The Last Wish", "Andrzej Sapkowski", 2007)
    book3 = Book("It", "Stephen King", 1986)
    print(' ')
    lib.add_book_to_lib(book1)
    lib.add_book_to_lib(book2)
    lib.add_book_to_lib(book3)
    print(' ')
    lib.add_user_to_readerslist(user1)
    lib.add_user_to_readerslist(user2)
    lib.add_user_to_readerslist(user3)
    print(' ')

    time.sleep(3)
    lib.give_book_to_user(user1, book1)
    lib.give_book_to_user(user3, book1)
    time.sleep(3)
    print(' ')
    lib.show_books(available=True)
    print(' ')
    lib.show_books(available=False)
    print(' ')
    lib.sort_books('year')
Example #3
0
class ReaderBase(TestCase):
    result = None
    reader = None

    read_fixture = [
        'type, cash, price, bonus_ratio',
        '"white",15, 2, 3',
        '"milk", 12, 2, 5',
        '"dark", 13, 4, 1',
        '"white", 6, 2, 2',
        '"bozo", 3, 2, 1',
    ]

    expected_result = [['"white"', '15', ' 2', ' 3'],
                       ['"milk"', ' 12', ' 2', ' 5'],
                       ['"dark"', ' 13', ' 4', ' 1'],
                       ['"white"', ' 6', ' 2', ' 2'],
                       ['"bozo"', ' 3', ' 2', ' 1']]

    def setUp(self):
        self.reader = Reader("")

    @patch("utils.reader.Reader.read")
    def test_read_and_split(self, read):
        read.return_value = self.read_fixture
        self.assertEqual(self.reader.result(), self.expected_result)

    @patch("utils.reader.Reader.read")
    def test_split(self, read):
        read.return_value = ['Skip heading..\n', 'valid,data\n']

        expected = [['valid', 'data']]
        self.assertEqual(self.reader.split(), expected)
 def testMakeobjectsfromxml(self):
     reader = Reader()
     soup = reader.readfile()
     threads = reader.makeobjectsfromxml(soup)
     for thread in threads:
         for document in thread._documents:
             print(document.text)
Example #5
0
    def calculate(
        self,
        source_code: str,
        priority_code: str,
        buy_price_code: str,
        sell_price_code: str,
        market: Market,
        start_date: date,
        end_date: date,
    ):
        logging.debug(
            f"{TAG} Start calculate start_date = {start_date}, end_date = {end_date}"
        )
        compile_result = self.compiler.compile(source_code, priority_code,
                                               buy_price_code, sell_price_code)

        executor = Executor("")
        reader = Reader(executor)
        field_list = list(compile_result.fields)
        required_field_list = [Field.open, Field.close, Field.is_active]
        for field in required_field_list:
            if field not in field_list:
                field_list.append(field)
        rows = reader.get_simulating_data(
            Universe.total, field_list, start_date, end_date,
            [Field.open, Field.close, Field.ticker_id, Field.low, Field.high])

        if len(rows) == 0:
            return None

        now = time.time()
        total_df = pd.DataFrame(rows)
        total_job_count = len(compile_result.item_list)
        completed_job_count = 0

        for item in compile_result.item_list:
            # TODO: is_rank 값 잘 들어가는지 계속 확인하고, 검증할것
            # 랭크 함수는 날짜 단위로 동작
            if item.is_rank:
                y = total_df.groupby("date", as_index=False).apply(
                    lambda df: self._calculate(item.code, df))
            else:  # 랭크 함수 이외의 함수는 종목 단위로 동작
                y = total_df.groupby("ticker_id", as_index=False).apply(
                    lambda df: self._calculate(item.code, df))

            different_columns = total_df.columns.symmetric_difference(
                y.columns)
            for column in different_columns:
                total_df.insert(0, column, y[column])
            completed_job_count += 1
            self.progress = Decimal(total_job_count / completed_job_count)

        logging.debug("{} execute time : {:0.3f}s".format(
            TAG,
            time.time() - now))
        return total_df
 def test_final(self):
     reader = Reader()
     soup = reader.readfile()
     threads = reader.makeobjectsfromxml(soup)
     tokenizer = Tokenizer(threads)
     collection_tokenized = tokenizer.tokenize()
     coll_model = CollectionModel(collection_tokenized)
     doc_model = DocumentModel(collection_tokenized)
     ret_model = RetrievalModel(collection_tokenized, doc_model, coll_model)
     ret_model.calculate_relevance()
 def testtokenizerfromfile(self):
     reader = Reader()
     soup = reader.readfile()
     threads = reader.makeobjectsfromxml(soup)
     tokenizer = Tokenizer(threads)
     threads_tokenized = tokenizer.tokenize()
     for thread in threads_tokenized:
         print(thread._query._body)
         for document in thread._documents:
             print(document._text)
 def testdocumentmodel(self):
     reader = Reader()
     soup = reader.readfile()
     threads = reader.makeobjectsfromxml(soup)
     tokenizer = Tokenizer(threads)
     threads_tokenized = tokenizer.tokenize()
     collection_model = CollectionModel(threads_tokenized)
     freq_collection = collection_model.calculate_frequency()
     print(freq_collection)
     document_model = DocumentModel(threads_tokenized)
     freq_document = document_model.calculate_frequency()
     print(freq_document)
Example #9
0
def train():
    print("training...")
    reader = Reader()
    with tf.Graph().as_default():
        global_step = tf.train.get_or_create_global_step()
        with tf.device('/cpu:0'):
            images, boxes = reader.distorted_inputs(FLAGS.train_dir,
                                                    FLAGS.batch_size)

        logits = None  #TODO
        loss = None  #TODO
        train_op = None  #TODO

        class _LoggerHook(tf.train.SessionRunHook):
            def begin(self):
                self._step = -1
                self._start_time = time.time()

            def before_run(self, run_context):
                self._step += 1
                return tf.train.SessionRunArgs(loss)

            def after_run(self, run_context, run_values):
                if self._step % FLAGS.log_frequency == 0:
                    current_time = time.time()
                    duration = current_time - self._start_time
                    self._start_time = current_time

                    #loss_value = run_values.results
                    loss_value = .1
                    examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
                    sec_per_batch = float(duration / FLAGS.log_frequency)
                    format_str = (
                        '%s: step: %d, loss = %.2f (%.1f examples/sec, %.3f sec/batch)'
                    )
                    print(format_str % (datetime.now(), self._step, loss_value,
                                        examples_per_sec, sec_per_batch))

            with tf.train.MonitoredTrainingSession(
                    checkpoint_dir=FLAGS.train_state_dir,
                    hooks=[
                        tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
                        tf.train.NanTensorHook(loss),
                        _LoggerHook()
                    ],
                    config=tf.ConfigProto(log_device_placement=FLAGS.
                                          log_device_placement)) as mon_sess:

                while not mon_sess.should_stop():
                    mon_sess.run(train_op)
Example #10
0
    def run_pipeline(self):
        customer_order = CustomerOrder()

        read = Reader(self._file)
        line_order = ValidateInput()

        self._line_item = []

        for order in read.result():
            line_order.set(order)
            if line_order.valid_types:
                customer_order.order_type = line_order.value[0]
                customer_order.cash = line_order.value[1]
                customer_order.price = line_order.value[2]
                customer_order.bonus_ratio = line_order.value[3]
                self._line_item.append(customer_order.final_order)
Example #11
0
    def _get_preprocessed_df(
        self, preprocessed_filename: str, df_articles: DataFrame, document_type: DocumentType, overwrite: bool
    ) -> DataFrame:
        """
        Helper function to get the preprocessed pandas dataframe. If the preprocessing already was done ones (JSON files
        exist) the tagging is not done again but the json files with the perprocessing are read into a pandas dataframe.
        If preprocessing is proceeded, the result will be stored in a json file. According to the document type, a
        different preprocessing is done.
        :param preprocessed_filename: Name of json file to store/ read the results of preprocessing.
        :param df_articles: Dataframe with the text to preprocess, if the data still needs to be preprocessed.
        :param document_type: Type of the document that is going to be preprocessed.
        :param overwrite: Determines if the previous data is allowed to be overwritten.
        :return: df_preprocessed: Pandas dataframe of the preprocessed input.
        """
        json_path = "src/output/" + preprocessed_filename + ".json"

        if Path(json_path).exists() and not overwrite:
            return Reader.read_json_to_df_default(json_path)

        if document_type.value == DocumentType.ARTICLE.value:
            df_preprocessed = self._apply_preprocessing(df_articles, document_type, FilterType.PARTIES)
        elif document_type.value == DocumentType.PARAGRAPH.value:
            df_preprocessed = self._preprocess_paragraphs(df_articles)
        else:
            df_articles = df_articles[["title", "media"]].rename(columns={"title": "text"})
            df_preprocessed = self._apply_preprocessing(df_articles, document_type, FilterType.NONE)

        return df_preprocessed
Example #12
0
    def ingest_data(self, filepath, dataset):
        """
        load data from a file to a dataframe and store it on the db

         Parameters
        ----------
        filepath : String
            file path of the .csv file for the dataset
        dataset: DataSet
            The DataSet object that holds the Session ID for HoloClean
        Returns
        -------
        No Return
        """
        # Spawn new reader and load data into dataframe
        fileReader = Reader(self.holoEnv.spark_session)
        df = fileReader.read(filepath)

        # Store dataframe to DB table
        schema = df.schema.names
        name_table = self._add_info_to_meta('Init', schema, dataset)
        self._dataframe_to_table(name_table, df)
        table_attribute_string = self.get_schema(dataset, "Init")
        count = 0
        map_schema = []
        attributes = table_attribute_string.split(',')
        for attribute in attributes:
            if attribute != "index":
                count = count + 1
                map_schema.append([count, attribute])

        dataframe_map_schema = self.holoEnv.spark_session.createDataFrame(
            map_schema,
            StructType([
                StructField("index", IntegerType(), False),
                StructField("attribute", StringType(), True)
            ]))
        self.add_db_table('Map_schema', dataframe_map_schema, dataset)

        for tuple in map_schema:
            self.attribute_map[tuple[1]] = tuple[0]
        return
Example #13
0
    def ingest_data(self, filepath, dataset):
        """
        Load data from a file to a dataframe and store it on the db

        filepath : String
            File path of the .csv file for the dataset
        dataset: DataSet
            The DataSet object that holds the Session ID for HoloClean

        """

        # Spawn new reader and load data into dataframe
        filereader = Reader(self.holo_env.spark_session)

        # read with an index column
        df = filereader.read(filepath,1)

        # Store dataframe to DB table
        schema = df.schema.names
        name_table = dataset.table_specific_name('Init')
        self.dataframe_to_table(name_table, df)
        dataset.attributes['Init'] = schema
        count = 0
        map_schema = []
        attribute_map = {}
        for attribute in schema:
            if attribute != GlobalVariables.index_name:
                count = count + 1
                map_schema.append([count, attribute])
                attribute_map[attribute] = count

        dataframe_map_schema = self.holo_env.spark_session.createDataFrame(
            map_schema, dataset.attributes['Map_schema'])
        self.add_db_table('Map_schema', dataframe_map_schema, dataset)

        for table_tuple in map_schema:
            self.attribute_map[table_tuple[1]] = table_tuple[0]

        return df, attribute_map
Example #14
0
def main():
    args = parse_args()
    create_dirs(args.model_name, [args.checkpoint_dir, args.log_dir])

    sess = tf.Session()

    logger = Logger(args, sess)
    model = Model(args, logger)
    reader = Reader(args, sess, logger)

    if args.action == 'train':
        trainer = Trainer(sess, model, reader, args, logger)
        trainer.train()
    else:
        predictor = Estimator(sess, model, reader, args, logger)
        predictor.predict()
Example #15
0
    def render_level(level, canvas):
        data = Reader.read_level(level)
        brick_dict = {}
        y1, y2 = 30, 60
        for idx, rw in enumerate(data):
            x1, x2 = 0, int(canvas.SCREENWIDTH * 0.8 / 10)
            for brk in rw[1:]:
                if brk:
                    brick = Brick(x1, y1, x2, y2, color=brk, resistance=rw[0])
                    brick.render_brick(canvas)
                    brick_dict[id(brick)] = brick
                else:
                    pass

                x1 += int(canvas.SCREENWIDTH * 0.8 / 10)
                x2 += int(canvas.SCREENWIDTH * 0.8 / 10)

            y1 += 30
            y2 += 30
        return brick_dict
Example #16
0
loss_function = configs.loss
hidden = configs.hidden
reg = configs.reg
n_neg_samples = configs.n_neg_samples
dropout = configs.dropout

if configs.debug:
    print(
        "loaded parameters dataset_name: %s, bern: %s, epochs: %d, batch_size: %d, learning_rate: %f, dim: %d, margin: %f, lr_decay: %f, loss_function: %s, hidden: %s"
        % (dataset_name, bern, epochs, batch_size, learning_rate, dim, margin,
           lr_decay, loss_function, hidden))

device = torch.device("cuda")
os.environ["CUDA_VISIBLE_DEVICES"] = gpu

reader = Reader(configs)

n_train = reader.n_train
n_ent = reader.n_ent
n_rel = reader.n_rel
stat = reader.stat
corrupter = Corrupter(configs, n_ent, stat)


def load_model(model_name):
    loaded_dict = torch.load(
        os.path.join(configs.save_path, model_name + ".mdl"))
    if model_name == "TransE":
        model = TransE(loaded_dict["configs"], n_ent, n_rel)
    else:
        model = ComplEx(loaded_dict["configs"], n_ent, n_rel)
Example #17
0
    def get_response(self):
        int_len = 4
        header = self.s.recv(7)
        packet_length = int_len.from_bytes(header[2:5], 'big')
        data = self.recvall(self.s, packet_length)
        r = Reader(data)
        code = r.readUInt32()

        if code == 7 or code == 8:
            self.fingerprint = json.loads(r.readFinger())  # fingerprint
            r.readInt32()
            r.readShort()
            self.assets_url = r.readString()  # assets url
            r.skip(23)
            r.readString()
            r.skip(2)
            r.readString()

            d = Downloader(self.fingerprint, self.assets_url)
            d.download()
        else:
            _(f"Recived code {code} - returning!")
            return
    def create_data2(self,
                     year,
                     tourist_file=TOURIST_FILE,
                     weather_file=WEATHER_FILE,
                     weather_file2=WEATHER_FILE2):
        """
        输入年份会根据tourist_file和weather_file中的数据构造数据集
        :param year: 数据年份,数值类型
        :param tourist_file: 客流量数据文件,默认路径TOURIST_FILE
        :param weather_file:  天气数据文件,默认路径WEATHER_FILE
        :param weather_file2: 天气详细数据文件,默认路径WEATHER_FILE2
        :return:
        """
        tourist_file = tourist_file.replace('YYYY', str(year))
        weather_file = weather_file.replace('YYYY', str(year))
        weather_out = self.WEATHER_OUT.replace('YYYY', str(year))
        holiday_out = self.HOLIDAY_OUT.replace('YYYY', str(year))
        file_out = self.FILE_OUT.replace('YYYY', str(year))

        if not os.path.isfile(weather_out):
            Reader().get_weather(weather_file, weather_out)
        weather_dict = Reader().read_weather(weather_out)
        weather_dict2 = Reader().read_weather2(weather_file2)

        # 从api获得year的节假日数据存入holiday_out中,避免多次http请求
        if not os.path.isfile(holiday_out):
            IfHoliday().get_year_holiday(year, holiday_out)
        holiday_dict = Reader().read_holiday(holiday_out)

        tourist_dict = Reader().read_tourist(
            tourist_file)  #用字典可能会导致日期顺序不对,影响特征工程正确率

        with codecs.open(file_out, 'a+', 'utf-8') as fout:
            fout.write(
                "scenic_area,date,tourist,holiday,weather,min_temperature,max_temperature,mean_temperature,"
                + "humidity,wind_speed,precipitation,cloudage" + "\n")
            for date, tourist in tourist_dict.items():
                name = "上饶灵山景区"  #景区名称

                weather = weather_dict[date][0]
                # max_temperature = weather_dict[date][1]
                # min_temperature = weather_dict[date][2]
                if date in weather_dict2.keys():
                    weather_info = str(weather_dict2[date]).strip('[]')
                else:
                    weather_info = str("None," * 7).strip('[],')
                '''下面这部分代码可以考虑放进IfHoliday()中'''
                holiday = 0
                day = datetime.strptime(date.replace('-', ''), "%Y%m%d").date()
                if date in holiday_dict.keys():
                    if holiday_dict[date]:
                        holiday = 2
                elif day.weekday() in [5, 6]:
                    holiday = 1

                text = name + "," + date + "," + str(tourist) + "," + str(
                    holiday) + "," + weather + "," + weather_info + "\n"
                fout.write(text)
Example #19
0
from pathlib import Path

from preprocessing import Preprocessing
from sentiment_gui import SentimentGUI
from tfidf_sentiment import TfidfSentiment
from utils.arguments import parse_arguments
from utils.comparison import Comparison
from utils.labeling import Labeling
from utils.reader import Reader
from utils.writer import Writer

if __name__ == "__main__":
    args = parse_arguments()

    # Read articles from json
    df_articles = Reader.read_articles(args.number_of_articles)

    # Apply preprocessing
    preprocessing = Preprocessing()
    df_paragraphs = preprocessing.get_paragraphs(
        df_articles, overwrite=args.force_processing)

    # Calculate sentiment of paragraphs
    tfidf_sentiment = TfidfSentiment(df_paragraphs)
    tfidf_sentiment.get_context_polarity(8)
    tfidf_sentiment.calculate_sentiment_score()
    tfidf_sentiment.map_sentiment()

    # Label data
    if args.labeling is not None:
        labeling = Labeling(df_paragraphs)
Example #20
0
 def setUp(self):
     self.reader = Reader("")
Example #21
0
 def __init__(self, path: str):
     self.dataframe = Reader.read_json_to_df_default(path)
     self.tfidf_sentiment = TfidfSentiment(self.dataframe)
Example #22
0
    # TODO = Parametros : Tamaño , Dimension del tablero , transversal
    ga.start()
    ga.report(0)


def play_with_hc(initialSate, dimension):
    hc = HillClimbing(initialSate, dimension, False)
    # TODO = Usando tablero Incial como Estado , Dimension del tablero ,
    #  Poner True si desea reiniciar al momento de encontrar la solucion
    hc.start()
    hc.report()


dimension = 10
# TODO = Se crea un tablero de 10 * 10
r = Reader('sample.txt')
# TODO = Archivo de ejemplo para la creacion del tablero
board = r.readFile()
initialSate = State(board)
# TODO = Setear el tablero en arrays para ser trabajado en el algoritmo

if PLAT_WITH == 'GA':
    play_with_hc(initialSate, dimension)
else:
    play_with_ga(dimension)

# TODO = Conclusion
#  Al parecer, aquí el algoritmo genético está que realiza mucho más rápido
#  La lógica de encontrar la mejor solucion, en caso de que haya un bucle,
#  reiniciar el juego
Example #23
0
 def read_data_from_file(self):
     my_reader = Reader()
     w_array = my_reader.read_report("generator" + "/" + self.filename + "/" + self.filename + "Output.txt")
     return w_array
Example #24
0
 def get_correct_array(self, current_file_path):
     my_reader = Reader()
     init_array = my_reader.read(current_file_path)
     return init_array
Example #25
0
 def decode(self, buffer):
     Reader.__init__(self, buffer)
Example #26
0
    "batch_size": 20,
    "embedding_dims": 100,
    "nb_filter": 250,
    "filter_length": 20,
    "pool_length": 2,
    "hidden_size": 200,
    "nb_epoch": 50,
    "dropout": 0.5,
    "train_file": "data/train_pdtb_imp.json",
    "vocab_file": "data/vocab",
    "test_file": "",
    "valid_file": "data/dev_pdtb_imp.json",
    "vocab_size": 100000,
}
print str(conf)
reader = Reader(conf)
reader.get_full_train_data()
reader.get_full_valid_data(get_id=True)

features = [[[], []], [[], []]]
targets = []
v_features = [[[], []], [[], []]]
v_targets = []
v_id = []
v_im_features = [[[], []], [[], []]]
v_im_targets = []
v_im_id = []

# for i in xrange(len(reader.train)):
#     features[0].append(reader.train[i][0][0])
#     features[1].append(reader.train[i][0][1])
Example #27
0
parser.add_argument('input',
                    metavar='txt_file',
                    type=str,
                    help='The path to txt file')

args = parser.parse_args()

model_rhyme = Rhyme()
sent = SentimentExtractor()
model_rhyme.load_model()
counter = 1

src = args.input
dest = src.replace("_txt", "_labeled")
reader = Reader(src)

# Read src file line by line
with open(src, mode="r", encoding="utf-8") as src_file:
    content = src_file.readlines()
# Open dest file
dest_file = open(dest, mode="a", encoding="utf-8")
stanza = list()
header = ""
footer = ""
time_epoch = ""
sentiment = ""
tracker = 0

# Define time epoch based on name of txt file
if ("1600" in src and "1700" in src) or ("1500" in src and "1600" in src):
Example #28
0
def run(from_date: datetime.date, to_date: datetime.date):
    executor = Executor("")
    reader = Reader(executor)
    return back_test(reader, from_date, to_date)
parser.add_argument('input',
                    metavar='input_folder',
                    type=str,
                    help='The path to input folder')

args = parser.parse_args()

path = args.input

counter = 1

for (dirpath, dirnames, filenames) in os.walk(path):
    for filename in filenames:
        src = os.path.join(dirpath, filename)

        # Debug
        print("File %d at: %s" % (counter, src))

        dest = dirpath + "_txt.txt"

        try:
            reader = Reader(src)
            reader.get_poem()
            reader.convert_to_txt(src, dest)
        except:
            print("Error: Skip this file")
            continue

        counter += 1