def parse(args):
    '''
    take in log file row by row added to ordered dict.
    subtracted by ordered dict
    by if new session is identified, current max time - timeout > min time,
    and dump at the end of file
    :param args:
    :return: void write file to outputPath
    '''
    timeout = get_inactivity_period(args.inactivityFilePath)
    rows = iter(get_log_data(args.logFilePath))
    next(rows, None)  # skip the header
    ip_map = OrderedDict()
    unique_timestamp = set()

    with contextlib.suppress(FileNotFoundError):
        os.remove(args.outPath)

    for (i, row) in enumerate(rows):
        try:
            if valiad_row(row):
                fields = row
                init_time = convert_datetime(fields[1], fields[2],
                                             args.time_format)
                unique_timestamp.add(init_time)
                ip = fields[0]
                if ip in ip_map:
                    if time_diff(init_time,
                                 ip_map[ip].get_last_session_time()) > timeout:
                        dump_list_to_file_as_line(ip_map[ip].output_session(),
                                                  args.outPath, args.delimiter)
                        ip_map[ip] = Session(ip, init_time)
                    else:
                        ip_map[ip].set_last_session_time(init_time)
                        ip_map[ip].increment_doc()
                else:
                    ip_map[ip] = Session(ip, init_time)

                if (len(unique_timestamp) - 1 > timeout):
                    exp_ips = [
                        v.get_ip() for v in ip_map.values()
                        if v.get_last_session_time() == min(unique_timestamp)
                    ]
                    for exp in exp_ips:
                        dump_list_to_file_as_line(
                            ip_map.pop(exp).output_session(), args.outPath,
                            args.delimiter)
                    unique_timestamp.remove(min(unique_timestamp))
            else:
                logging.warning('row {} has invalid data'.format(i))
        except Exception as e:
            logging.warning('row {} could not be processed because {}'.format(
                i, str(e)))

    for ip in ip_map.keys():
        dump_list_to_file_as_line(ip_map[ip].output_session(), args.outPath,
                                  args.delimiter)

    logging.info('Excution complete')
Beispiel #2
0
def algoseekdata(source, destination_matrix):
    """
    order_book is of type Book() and holds book data. ob_state is a specific state
    """
    for subdir, dirs, files in os.walk(source):
        for file in files:
            data_path = os.path.join(subdir, file)
            with open(data_path) as csv_file:
                if data_path == '/rds/general/user/kk2219/home/orderbook_rebuild/data/input/raw/20190403/USM_NASDAQ.csv':
                    reader = csv.reader(csv_file, delimiter=',')
                    print(data_path)
                    order_book = Book()
                    action_multi = []
                    action_single = 0
                    ob_state_list = [
                    ]  # List of matrices per time point for order book state changes
                    action_multi_list = [
                    ]  # List of actions per time point. Last action will be 0. Action is move after current state
                    action_single_list = []
                    try:
                        next(reader)
                        for row in reader:
                            timestamp = utils.convert_datetime(row[0], row[1])
                            if row[3] in [
                                    'ADD BID', 'EXECUTE BID', 'CANCEL BID',
                                    'FILL BID', 'DELETE BID'
                            ]:
                                action_multi = order_book.action_multi_task(
                                    row[2], row[3], row[5], 1, row[6])
                                action_single = order_book.action_single_task(
                                    row[2], row[3], row[5], 1)
                                order_book.bid_split(
                                    row[4], row[2], row[6], row[5], timestamp,
                                    config.algoseek_dict.get(row[3]), row[7],
                                    row[3])
                            elif row[3] in [
                                    'ADD ASK', 'EXECUTE ASK', 'CANCEL ASK',
                                    'FILL ASK', 'DELETE ASK'
                            ]:
                                action_multi = order_book.action_multi_task(
                                    row[2], row[3], row[5], 2, row[6])
                                action_single = order_book.action_single_task(
                                    row[2], row[3], row[5], 2)
                                order_book.ask_split(
                                    row[4], row[2], row[6], row[5], timestamp,
                                    config.algoseek_dict.get(row[3]), row[7],
                                    row[3])

                            ob_state = copy.deepcopy(
                                order_book.store_lob_matrix)
                            ob_state_list.append(ob_state)
                            action_multi_list.append(action_multi)
                            action_single_list.append(action_single)
                    except IOError:
                        print('Cannot open input file "%s"' % sys.argv[1])
                        sys.exit(1)
Beispiel #3
0
def algoseekdata(source, destination_matrix):
    """
    order_book is of type Book() and holds book data. ob_state is a specific state
    """
    for subdir, dirs, files in os.walk(source):
        for file in files:
            data_path = os.path.join(subdir, file)
            with open(data_path) as csv_file:
                reader = csv.reader(csv_file, delimiter=',')
                print(data_path)
                order_book = Book()
                action_multi = []
                action_single = 0
                ob_state_list = [
                ]  # List of matrices per time point for order book state changes
                action_multi_list = [
                ]  # List of actions per time point. Last action will be 0. Action is move after current state
                action_single_list = []
                try:
                    next(reader)
                    for row in reader:
                        timestamp = utils.convert_datetime(row[0], row[1])
                        if row[3] in [
                                'ADD BID', 'EXECUTE BID', 'CANCEL BID',
                                'FILL BID', 'DELETE BID'
                        ]:
                            action_multi = order_book.action_multi_task(
                                row[2], row[3], row[5], 1, row[6])
                            action_single = order_book.action_single_task(
                                row[2], row[3], row[5], 1)
                            order_book.bid_split(
                                row[4], row[2], row[6], row[5], timestamp,
                                config.algoseek_dict.get(row[3]), row[7],
                                row[3])
                        elif row[3] in [
                                'ADD ASK', 'EXECUTE ASK', 'CANCEL ASK',
                                'FILL ASK', 'DELETE ASK'
                        ]:
                            action_multi = order_book.action_multi_task(
                                row[2], row[3], row[5], 2, row[6])
                            action_single = order_book.action_single_task(
                                row[2], row[3], row[5], 2)
                            order_book.ask_split(
                                row[4], row[2], row[6], row[5], timestamp,
                                config.algoseek_dict.get(row[3]), row[7],
                                row[3])

                        ob_state = copy.deepcopy(order_book.store_lob_matrix)
                        ob_state_list.append(ob_state)
                        action_multi_list.append(action_multi)
                        action_single_list.append(action_single)
                    X_file_name = destination_matrix / pathlib.PurePath(os.path.normpath(data_path)).parent.name / 'X' / \
                                os.path.splitext(pathlib.PurePath(data_path).name)[0]
                    Y_file_name = destination_matrix / pathlib.PurePath(os.path.normpath(data_path)).parent.name / 'Y' / \
                                os.path.splitext(pathlib.PurePath(data_path).name)[0]
                    Z_file_name = destination_matrix / pathlib.PurePath(os.path.normpath(data_path)).parent.name / 'Z' / \
                                os.path.splitext(pathlib.PurePath(data_path).name)[0]

                    utils.make_dir(X_file_name)
                    utils.make_dir(Y_file_name)
                    utils.make_dir(Z_file_name)

                    np.save(X_file_name, ob_state_list)
                    np.save(Y_file_name, action_multi_list)
                    np.save(Z_file_name, action_single_list)
                except IOError:
                    print('Cannot open input file "%s"' % sys.argv[1])
                    sys.exit(1)
Beispiel #4
0
 def update_data(self, course, answers):
     for (taskid, subtaskid, _, userid, time, correct) in answers.answers:
         time = utils.convert_datetime(time)
         if not self.submits[userid][subtaskid]:
             self.post_solution(userid, taskid, time)
             self.score_task(userid, taskid, subtaskid, correct)
Beispiel #5
0
 def _problem_submitted(self, item):
     self._update_course(item)
     (user_id, problem_id, page, time) = get_items(
         item, ['context.user_id', 'event.problem_id', 'referer', 'time'])
     self.modules.add_task(page, problem_id)
     self.users.post_solution(user_id, problem_id, convert_datetime(time))
Beispiel #6
0
 def test_convert_datetime(self):
     self.assertEqual(t.convert_datetime('2018-03-03T16:00:14.5678'),
                      '03.03.2018 16:00:14')
     self.assertEqual(t.convert_datetime('2018-03-17T01:59:14.5678+0000'),
                      '17.03.2018 01:59:14')