Example #1
0
def scan_sampling_iters(fd: TextIO, config_dict: Dict[str, Any],
                        lineno: int) -> int:
    """
    Parse sampling iteration, save number of iterations to config_dict.
    """
    draws_found = 0
    num_cols = len(config_dict['column_names'])
    cur_pos = fd.tell()
    line = fd.readline().strip()
    while len(line) > 0 and not line.startswith('#'):
        lineno += 1
        draws_found += 1
        data = line.split(',')
        if len(data) != num_cols:
            raise ValueError(
                'line {}: bad draw, expecting {} items, found {}\n'.format(
                    lineno, num_cols, len(line.split(','))) +
                'This error could be caused by running out of disk space.\n'
                'Try clearing up TEMP or setting output_dir to a path'
                ' on another drive.', )
        cur_pos = fd.tell()
        line = fd.readline().strip()
    config_dict['draws_sampling'] = draws_found
    fd.seek(cur_pos)
    return lineno
Example #2
0
def get_file_size(f: typing.TextIO) -> int:
	""" Gets file size. This function restores the file position. """
	restore = f.tell()
	f.seek(0, 2)
	ret = f.tell()
	f.seek(restore)
	return ret
Example #3
0
def get_file_size(file: TextIO) -> int:
    """Get size of file in bytes without altering file"""
    starting_pos = file.tell()
    file.seek(0, os.SEEK_END)
    file_size = file.tell()
    file.seek(0, starting_pos)
    return file_size
Example #4
0
def scan_config(fd: TextIO, config_dict: Dict[str, Any], lineno: int) -> int:
    """
    Scan initial stan_csv file comments lines and
    save non-default configuration information to config_dict.
    """
    cur_pos = fd.tell()
    line = fd.readline().strip()
    while len(line) > 0 and line.startswith('#'):
        lineno += 1
        if line.endswith('(Default)'):
            line = line.replace('(Default)', '')
        line = line.lstrip(' #\t')
        key_val = line.split('=')
        if len(key_val) == 2:
            if key_val[0].strip() == 'file' and not key_val[1].endswith('csv'):
                config_dict['data_file'] = key_val[1].strip()
            elif key_val[0].strip() != 'file':
                raw_val = key_val[1].strip()
                val: Union[int, float, str]
                try:
                    val = int(raw_val)
                except ValueError:
                    try:
                        val = float(raw_val)
                    except ValueError:
                        val = raw_val
                config_dict[key_val[0].strip()] = val
        cur_pos = fd.tell()
        line = fd.readline().strip()
    fd.seek(cur_pos)
    return lineno
Example #5
0
def scan_draws(fp: TextIO, config_dict: Dict, lineno: int) -> int:
    """
    Parse draws, check elements per draw, save num draws to config_dict.
    """
    draws_found = 0
    num_cols = len(config_dict['column_names'])
    cur_pos = fp.tell()
    line = fp.readline().strip()
    first_draw = None
    while len(line) > 0 and not line.startswith('#'):
        lineno += 1
        draws_found += 1
        data = line.split(',')
        if len(data) != num_cols:
            raise ValueError(
                'line {}: bad draw, expecting {} items, found {}'.format(
                    lineno, num_cols, len(line.split(','))))
        if first_draw is None:
            first_draw = np.array(data, dtype=np.float64)
        cur_pos = fp.tell()
        line = fp.readline().strip()
    config_dict['draws'] = draws_found
    config_dict['first_draw'] = first_draw
    fp.seek(cur_pos)
    return lineno
Example #6
0
 def _last(_f: TextIO, _l: int):
     while True:
         try:
             _f.seek(-1 * _buffer, os.SEEK_END)
         except IOError:
             _f.seek(0)
         found = _f.readlines()
         if len(found) >= _l or _f.tell() == 0:
             return found[-_l:], _f.tell()
Example #7
0
def _read_file_chunk(file: TextIO, chunksize: int) -> str:
    """ Reads a chunk starting from `chunksize` before file pointer and up to current file pointer
    If `chunksize` is larger than the current file pointer, the file is read from the beginning
    Returns the read content in reverse order and moves the file pointer to where the content starts
    Reverse order is used, as it will be mostly faster to search for newlines,
    especially if there are many lines in a given chunk """
    mov = file.tell() - max(file.tell() - chunksize, 0)
    file.seek(file.tell() - mov)
    reversed_content = file.read(mov)[::-1]
    file.seek(file.tell() - mov)
    return reversed_content
Example #8
0
def scan_warmup(fp: TextIO, config_dict: Dict, lineno: int) -> int:
    """
    Check warmup iterations, if any.
    """
    if 'save_warmup' not in config_dict:
        return lineno
    cur_pos = fp.tell()
    line = fp.readline().strip()
    while len(line) > 0 and not line.startswith('#'):
        lineno += 1
        cur_pos = fp.tell()
        line = fp.readline().strip()
    fp.seek(cur_pos)
    return lineno
Example #9
0
    def _read_game(self, infile: typing.TextIO):
        """Read events and generate data until a new game is declared in the file.

        Args:
            infile: An open buffer reading the event file.

        Yields:
            A row of tabular data for every play in the game.
        """
        while True:
            prev_loc = infile.tell()
            line = infile.readline()
            if not line:
                return
            fields = line.strip().split(",")
            if fields[0] == "id":
                infile.seek(prev_loc)
                return
            if fields[0] in ["start", "sub"]:
                pid: str = fields[1]
                pos: int = int(fields[5])
                lineup = self.h_lineup if int(fields[3]) else self.v_lineup
                lineup[pos - 1] = pid
                continue
            if fields[0] == "play":
                self._process_play(*fields[1:])
                yield self._current_event
                self._current_event = None
Example #10
0
def parse_conllu_plus_fields(
    in_file: T.TextIO,
    metadata_parsers: T.Optional[T.Dict[str, _MetadataParserType]] = None
) -> T.Optional[T.Sequence[str]]:
    pos = in_file.tell()

    # Get first line
    try:
        first_sentence = next(parse_sentences(in_file))
        first_line = first_sentence.split("\n")[0]
    except StopIteration:
        first_line = ""

    # parse_sentences moves to file cursor, so reset it here
    in_file.seek(pos)

    if not first_line.startswith("#"):
        return None

    tokenlist = parse_token_and_metadata(first_line,
                                         metadata_parsers=metadata_parsers)
    metadata = tokenlist.metadata

    fields = None
    if "global.columns" in metadata and metadata["global.columns"]:
        fields = [
            value.lower() for value in metadata["global.columns"].split(" ")
        ]

    return fields
Example #11
0
def get_mzXMLs_from_pep_xml(pepxml_file: typing.TextIO):
    REC = re.compile(' base_name="(.+?)"')
    currpos = pepxml_file.tell()
    t = pepxml_file.read()
    pepxml_file.seek(currpos)
    paths = map(pathlib.Path, REC.findall(t))
    return [path.with_suffix(".mzXML") for path in paths if path.is_absolute()]
Example #12
0
    def from_file(cls, fp: TextIO, skip_line=2):
        id = fp.readline()
        # print(id)
        if id is None:
            raise RuntimeError('No more line exist in {}'.format(fp))
        for i in range(skip_line):
            fp.readline()

        size = eval(fp.readline())
        data = []

        for i in range(size):
            now_position = fp.tell()
            line = fp.readline()
            try:
                x, y = map(float, line.split())
                data.append([x, y])
            except Exception as e:
                logger.warning(
                    f"Except point number in {fp} is {size} but only get {i} points"
                )
                # 拟合数据中的数据长度不等于self,_size
                fp.seek(now_position)  # 回到上一行
                size = i  # 更新点的数量
                break

        return SinglePoint(size, data)
Example #13
0
    def _new_game(self, game_id: str, infile: typing.TextIO):
        """Consumes lines describing game metadata from the event file.

        Args:
            game_id: Identifier for the game.
            infile: Open buffer reading the event file.

        Returns:
            None.
        """
        self.current_game = {"id": game_id}
        while True:
            prev_loc = infile.tell()
            line = infile.readline()
            if not line:
                raise Exception("Encountered EOF while parsing new game info")
            fields = line.strip().split(",")
            if fields[0] not in ("version", "info"):
                infile.seek(prev_loc)
                return
            if fields[0] == "info":
                field, value = fields[1:]
                self.current_game[field] = value
                if field == "visteam":
                    self.v_roster = data.get_roster(self.year, value)
                elif field == "hometeam":
                    self.h_roster = data.get_roster(self.year, value)
Example #14
0
def scan_warmup_iters(fd: TextIO, config_dict: Dict, lineno: int) -> int:
    """
    Check warmup iterations, if any.
    """
    if 'save_warmup' not in config_dict:
        return lineno
    cur_pos = fd.tell()
    line = fd.readline().strip()
    draws_found = 0
    while len(line) > 0 and not line.startswith('#'):
        lineno += 1
        draws_found += 1
        cur_pos = fd.tell()
        line = fd.readline().strip()
    fd.seek(cur_pos)
    config_dict['draws_warmup'] = draws_found
    return lineno
Example #15
0
    def dump_mordict(self,
                     buffer: typing.TextIO,
                     with_comments: bool = True) -> typing.NoReturn:
        if self.enabled:
            self.phon.dump_mordict(buffer, with_comments)
            buffer.write("\t")
            self.cat.dump_mordict(buffer, with_comments)

            if self.sem.value:
                buffer.write(" ")
            # === END IF ===
            self.sem.dump_mordict(buffer, with_comments)

            if self.gloss.value:
                buffer.write(" ")
            # === END IF ===
            self.gloss.dump_mordict(buffer, with_comments)
        else:
            # discard contents and just dump comments
            disabled_entry = io.StringIO()  # type: io.StringIO

            self.phon.dump_mordict(disabled_entry, with_comments)
            buffer.write("\t")
            self.cat.dump_mordict(disabled_entry, with_comments)

            if self.sem.value:
                buffer.write(" ")
            # === END IF ===
            self.sem.dump_mordict(disabled_entry, with_comments)

            if self.gloss.value:
                buffer.write(" ")
            # === END IF ===
            self.gloss.dump_mordict(disabled_entry, with_comments)

            disabled_entry_res = disabled_entry.getvalue().replace(
                "\r\n", " ").replace("\n", " ").replace("\r", " ")  # type: str

            buffer.writelines((
                "% DISABLED: ",
                disabled_entry_res,
            ))
        # === END IF ===

        if with_comments:
            for c in self.comments:
                c.dump_mordict(buffer, with_comments=True)
            # === END FOR c ===
        # === END IF ===

        if buffer.seekable():
            buffer.seek(buffer.tell() - 1)
            last_char = buffer.read()  # type: str
            if last_char not in "\n\r":
                buffer.write("\n")
            # === END IF ===
        else:
            buffer.write("\n")
Example #16
0
def run_csv2rdf(csv_filename: str,
                metadata_filename: str,
                csv_io: TextIO,
                metadata_io: TextIO,
                codelists_base: Optional[str] = None):
    client = docker.from_env()
    csv2rdf = client.containers.create(
        'gsscogs/csv2rdf',
        command=
        f'csv2rdf -m annotated -o /tmp/output.ttl -t /tmp/{csv_filename} -u /tmp/{metadata_filename}'
    )
    archive = BytesIO()
    metadata_io.seek(0, SEEK_END)
    metadata_size = metadata_io.tell()
    metadata_io.seek(0)
    csv_io.seek(0, SEEK_END)
    csv_size = csv_io.tell()
    csv_io.seek(0)
    with TarFile(fileobj=archive, mode='w') as t:
        tis = TarInfo(str(metadata_filename))
        tis.size = metadata_size
        tis.mtime = time.time()
        t.addfile(tis, BytesIO(metadata_io.read().encode('utf-8')))
        tic = TarInfo(str(csv_filename))
        tic.size = csv_size
        tic.mtime = time.time()
        t.addfile(tic, BytesIO(csv_io.read().encode('utf-8')))
        if codelists_base is not None:
            t.add(Path('features') / 'fixtures' / codelists_base,
                  arcname=codelists_base)

    archive.seek(0)
    csv2rdf.put_archive('/tmp/', archive)
    csv2rdf.start()
    response = csv2rdf.wait()
    sys.stdout.write(csv2rdf.logs().decode('utf-8'))
    assert_equal(response['StatusCode'], 0)
    output_stream, output_stat = csv2rdf.get_archive('/tmp/output.ttl')
    output_archive = BytesIO()
    for line in output_stream:
        output_archive.write(line)
    output_archive.seek(0)
    with TarFile(fileobj=output_archive, mode='r') as t:
        output_ttl = t.extractfile('output.ttl')
        return output_ttl.read()
Example #17
0
def sniff_reader(file: typing.TextIO, num: int = 40) -> ReadFuncTy:
    pos = file.tell()
    header = file.readline()
    sample = header + "".join(itertools.islice(file, num - 1))  # read first N lines to sniff
    ret = None
    try:
        b = sniff_json(sample)
    except ValueError:
        try:
            dialect = csv.Sniffer().sniff(sample, (";", ","))
        except csv.Error:
            try:
                linereader = sniff_xmage(sample.splitlines())
            except ValueError:
                linereader = sniff_plain(sample.splitlines())
                mylogger.MAINLOGGER.info("Plain text guessed")
                ret = lambda fp: read_txt(fp, line_reader=linereader)
            else:
                mylogger.MAINLOGGER.info("Xmage save file guessed")
                ret = lambda fp: read_xmage_deck(fp, line_reader=linereader)
        else:
            mylogger.MAINLOGGER.info("CSV input guessed")
            v = csv.reader([header], dialect=dialect)
            line = list(map(str.lower, next(v)))
            count_column = line.index("count")
            name_column = line.index("name")
            try:
                section_column = line.index("section")
            except ValueError:
                section_column = None
            try:
                version_column = line.index("edition")
            except ValueError:
                version_column = None
            try:
                collectors_num_column = line.index("card number")
            except ValueError:
                collectors_num_column = None
            try:
                language_column = line.index("language")
            except ValueError:
                language_column = None

            ret = lambda file: read_csv(file,
                                        name_column=name_column,
                                        count_column=count_column,
                                        version_column=version_column,
                                        section_column=section_column,
                                        collectors_num_column=collectors_num_column,
                                        language_column=language_column,
                                        dialect=dialect)
    else:
        mylogger.MAINLOGGER.info("JSON file guessed")
        ret = lambda fp: read_json(fp)
    file.seek(pos)
    return ret
Example #18
0
    def read_params_from_txt(cls, file: TextIO):
        start_pos = file.tell()
        header = file.readline()
        if not header.startswith(cls.header_n_params_key):
            warnings.warn('File has no header parameters')
            file.seek(start_pos)
            return

        json_length = int(header.split()[1]) + 1  # 1 for last '\n'
        params = json.loads(file.read(json_length), cls=ReprJSONDecoder)
        return params
Example #19
0
def should_rotate(message: loguru.Message, file: typing.TextIO) -> bool:
    """When should the bot rotate : Once in 1 week or if the size is greater than 5 MB."""
    filepath = os.path.abspath(file.name)
    creation = os.path.getmtime(filepath)
    now = message.record["time"].timestamp()
    max_time = 7 * 24 * 60 * 60  # 1 week in seconds
    if file.tell() + len(message) > 5 * (2**20):  # if greater than size 5 MB
        return True
    if now - creation > max_time:
        return True
    return False
Example #20
0
 def write_to_file(self, file_stream: TextIO, cards: Iterator[Card]):
     writer = csv.writer(file_stream, delimiter=",")
     if file_stream.tell() == 0:
         writer.writerow(
             ["uid", "level", "vip_type", "vip_status", "timestamp"])
     for card in cards:
         user = card.user
         writer.writerow([
             user.uid, user.level, user.vip_type, user.vip_status,
             card.timestamp
         ])
Example #21
0
def detect_eval(fp: TextIO) -> Callable[[str], TParseRet]:
    pos = fp.tell()
    line = fp.readline()
    fp.seek(pos)

    if "rbp_eval" in line:
        parse_func = rbp_parse
    elif "runid" in line:
        parse_func = gdeval_parse
    else:
        parse_func = trec_parse
    return parse_func
Example #22
0
def scan_sampling_iters(fd: TextIO, config_dict: Dict, lineno: int) -> int:
    """
    Parse sampling iteration, save number of iterations to config_dict.
    """
    draws_found = 0
    num_cols = len(config_dict['column_names'])
    cur_pos = fd.tell()
    line = fd.readline().strip()
    while len(line) > 0 and not line.startswith('#'):
        lineno += 1
        draws_found += 1
        data = line.split(',')
        if len(data) != num_cols:
            raise ValueError(
                'line {}: bad draw, expecting {} items, found {}'.format(
                    lineno, num_cols, len(line.split(','))))
        cur_pos = fd.tell()
        line = fd.readline().strip()
    config_dict['draws_sampling'] = draws_found
    fd.seek(cur_pos)
    return lineno
Example #23
0
def scan_config(fp: TextIO, config_dict: Dict, lineno: int) -> int:
    """
    Scan initial stan_csv file comments lines and
    save non-default configuration information to config_dict.
    """
    cur_pos = fp.tell()
    line = fp.readline().strip()
    while len(line) > 0 and line.startswith('#'):
        lineno += 1
        if not line.endswith('(Default)'):
            line = line.lstrip(' #\t')
            key_val = line.split('=')
            if len(key_val) == 2:
                if key_val[0].strip(
                ) == 'file' and not key_val[1].endswith('csv'):
                    config_dict['data_file'] = key_val[1].strip()
                elif key_val[0].strip() != 'file':
                    config_dict[key_val[0].strip()] = key_val[1].strip()
        cur_pos = fp.tell()
        line = fp.readline().strip()
    fp.seek(cur_pos)
    return lineno
Example #24
0
def _retrieve_last_line_of_file(f: typing.TextIO, read_chunk_size: int = 100) -> str:
    """ Retrieve the last line of the file.

    From: https://stackoverflow.com/a/7167316/12907985

    Args:
        f: File-like object.
        read_chunk_size: Size of step in bytes to read backwards into the file. Default: 100.

    Returns:
        Last line of file, assuming it's found.
    """
    last_line = ""
    while True:
        # We grab chunks from the end of the file towards the beginning until we
        # get a new line
        # However, we need to be more careful because seeking directly from the end
        # of a text file is apparently undefined behavior. To address this, we
        # follow the approach from here: https://stackoverflow.com/a/51131242/12907985
        # First, move to the end of the file.
        f.seek(0, os.SEEK_END)
        # Then, just back from the current position (based on SEEK_SET and tell()
        # of the current position).
        f.seek(f.tell() - len(last_line) - read_chunk_size, os.SEEK_SET)
        # NOTE: This chunk isn't necessarily going back read_chunk_size characters, but
        #       rather just some number of bytes. Depending on the encoding, it may be.
        #       In any case, we don't care - we're just looking for the last line.
        chunk = f.read(read_chunk_size)

        if not chunk:
            # The whole file is one big line
            return last_line

        if not last_line and chunk.endswith('\n'):
            # Ignore the trailing newline at the end of the file (but include it
            # in the output).
            last_line = '\n'
            chunk = chunk[:-1]

        nl_pos = chunk.rfind('\n')
        # What's being searched for will have to be modified if you are searching
        # files with non-unix line endings.

        last_line = chunk[nl_pos + 1:] + last_line

        if nl_pos == -1:
            # The whole chunk is part of the last line.
            continue

        return last_line
Example #25
0
def reader(f: TextIO, field_cnt: int) -> CsvReader:
    """ Replaces the native CSV reader since it strips quotes when the field 
    doesn't contain a comma... Disabling quoting breaks when the field does 
    contain a comma but fixes when the sub delimiters are present. 
    
        The header will be skipped if present in the CSV file. Looking for 
    the SEQ header should be sufficient to skip
    """
    inst = _CsvReaderImpl(f, field_cnt)

    pos = f.tell()
    has_header = f.read(4).upper().startswith("SEQ|")
    if has_header:
        f.readline()
    else:
        f.seek(pos)
    return inst
Example #26
0
def read_plot_data(in_file: TextIO) -> pd.DataFrame:
    """Read an AFL `plot_data` file."""
    def fix_map_size(x):
        if isinstance(x, str):
            return float(x.split('%')[0])
        return x

    # Skip the opening '# ' (if it exists)
    pos = in_file.tell()
    first_chars = in_file.read(2)
    if first_chars != '# ':
        in_file.seek(pos)

    # Read the data
    df = pd.read_csv(in_file, index_col=False, skipinitialspace=True)
    df.map_size = df.map_size.apply(fix_map_size)

    return df
Example #27
0
def parse_quant_exprs(
    file: typing.TextIO,
    var_num: int,
) -> (typing.List[QuantExpr], int):
    """
    :param file: File handler with cursor at the first quantifier line
    :param var_num: Number of variables
    :return: The parsed quant expressions and the line position of the SAT part
    """

    quants = []
    used_vars = [0 for _ in range(var_num)]  # Enforce invariants

    while True:
        line_pos = file.tell()
        line = file.readline()
        tokens = line.split(' ')

        if tokens[0] != 'a' and tokens[0] != 'e':
            for i in range(var_num):
                if used_vars[i] == 0:
                    raise_qdimacs_exception('variable {} is unbound'.format(i +
                                                                            1))

            break

        quantifier = Quantifier.Exists if tokens[
            0] == 'a' else Quantifier.Forall
        int_tokens = list(map(lambda x: int(x), tokens[1:]))

        # Truncate the the last '0' token
        int_tokens = int_tokens[:-1]

        # Bookkeeping
        for token in int_tokens:
            if used_vars[token - 1] != 0:
                raise Exception(
                    "variable {} has been bound multiple times".format(token))
            used_vars[token - 1] = 1

        quants.append(QuantExpr(quantifier, int_tokens))

    return quants, line_pos
Example #28
0
def deck_load(inp: TextIO, name: str) -> Deck:
    """Parse single deck from a formatted checkpoint in a filelike object."""
    start_pos = inp.tell()
    for line in inp:
        if name in line:
            break
    else:
        inp.seek(start_pos)  # return to start pos
        raise AttributeError(f"{name} could not be found in input string.")

    rest = line + "\n" + inp.read()  # read rest of file
    inp.seek(start_pos)  # return to start_pos

    # parse rest
    group = deck.parseString(rest)[0]
    my_name = "".join(group["key"])
    if my_name.strip() != name.strip():
        raise RuntimeWarning(
            f"expected deck {name}, but got a deck named {my_name}")
    return _to_deck(group)
Example #29
0
def skip_initial_comment(f_stream: TextIO) -> int:
    """
    Initial comment in ~/.pg_service.conf is not always marked with '#'
    which crashes the parser. This function takes a file object and
    "rewinds" it to the beginning of the first section,
    from where on it can be parsed safely

    :return: number of skipped lines
    """
    section_regex = r"\s*\["
    pos = f_stream.tell()
    lines_skipped = 0
    while True:
        line = f_stream.readline()
        if line == "":
            break
        if re.match(section_regex, line) is not None:
            f_stream.seek(pos)
            break
        else:
            pos += len(line)
            lines_skipped += 1
    return lines_skipped
Example #30
0
def peek(file: TextIO) -> str:
    pos = file.tell()
    line = file.readline()
    file.seek(pos)
    return line