Esempio n. 1
0
def _skip(f: TextIO, ln: str,
          Nl_sv: int,
          sv: Sequence[str] = None):
    """
    skip ahead to next time step
    """
    if sv is None:
        sv = _getsvind(f, ln)

    # f.seek(len(sv)*Nl_sv*80, 1)  # not for io.TextIOWrapper ?
    for _ in range(len(sv)*Nl_sv):
        f.readline()
Esempio n. 2
0
def _getsvind(f: TextIO, ln: str) -> List[str]:
    if len(ln) < 32:
        raise ValueError(f'satellite index line truncated:  {ln}')

    Nsv = int(ln[29:32])  # Number of visible satellites this time %i3
    # get first 12 SV ID's
    sv = _getSVlist(ln, min(12, Nsv), [])

    # any more SVs?
    n = Nsv-12
    while n > 0:
        sv = _getSVlist(f.readline(), min(12, n), sv)
        n -= 12

    if Nsv != len(sv):
        raise ValueError('satellite list read incorrectly')

    return sv
Esempio n. 3
0
    def format(self, f: TextIO) -> None:
        width: List[int] = []
        columns: List[str] = []
        rows: List[List[str]] = []

        for column in self.row_set.columns():
            name = str(column.name)
            columns.append(name)
            width.append(len(name))

        for row in self.row_set.iter():
            expressions: List[str] = []

            for (column, cell) in zip(self.row_set.columns(), row.data):
                expr: ExpressionLiteral = column.ty.construct(cell)
                sql = expr.to_sql()
                expressions.append(sql)

            rows.append(expressions)

        for row in rows:
            for (i, cell) in enumerate(row):
                width[i] = max(len(cell), width[i])

        f.write(' ')  # shift to the right for the sliding effect
        f.write(
            self.separator.join(c.center(w) for (c, w) in zip(columns, width)))
        f.write(os.linesep)
        f.write(self.separator.join(''.ljust(w, '/') for w in width))
        f.write(os.linesep)

        for row in rows:
            f.write(
                self.separator.join(c.ljust(w) for (c, w) in zip(row, width)))
            f.write(os.linesep)
Esempio n. 4
0
def tokenize(input_fd: io.TextIO) -> Iterator[Tuple[Optional[str], int, int]]:
    """
    Read characters from input file one-by-one and generate stream of tokens with metadata, separated by spaces or ';'
    ';' and \n are replaced by None, meaning end of line
    escaped characters in string literal are unescaped.
    Generate tuples (value:str_or_none, line:int, start_position_in_line:int)
    """
    curr_buff = None  # current token or None is no active token
    line_num = 0
    line_pos = 0
    in_str = False  # true, if now parsing string literal (==in quotes)
    token_start_line = None  # line_num & line_pos at the beginning of current token
    token_start_pos = None
    eof_reached = False

    while not eof_reached:
        ch = input_fd.read(1)

        if ch == '':
            if in_str:
                raise ParserError(
                    "Unclosed string at the end of file at line {}".format(
                        line_num))
            ch = '\n'  # hack to flush current buffer
            eof_reached = True

        line_pos += 1

        if in_str:
            if ch == '\\':
                ch = input_fd.read(1)
                if ch == '':
                    return

                line_pos += 1

                if ch not in ESCAPED:
                    raise ParserError(
                        "Failed to parse at line {}".format(line_num))

                curr_buff += ESCAPED[ch]
            elif ch == '"':
                yield curr_buff + '"', token_start_line, token_start_pos
                token_start_pos = None
                token_start_line = None
                curr_buff = None
                in_str = False
            elif ch == '\n':
                raise ParserError(
                    "Failed to parse at line {}".format(line_num))
            else:
                curr_buff += ch
        elif ch == '"':
            in_str = True
            curr_buff = ch
            token_start_pos = line_pos
            token_start_line = line_num
        elif ch.isspace():
            if curr_buff is not None:
                yield curr_buff, token_start_line, token_start_pos
                curr_buff = None
                token_start_pos = None
                token_start_line = None

            if ch == '\n':
                yield None, line_num, line_pos
                line_num += 1
                line_pos = 0

        elif ch == ';':
            yield None, line_num, line_pos
        else:
            if curr_buff is None:
                curr_buff = ch
                token_start_pos = line_pos
                token_start_line = line_num
            else:
                curr_buff += ch
    assert curr_buff is None, "Non flushed token at the end of file"
Esempio n. 5
0
def obsheader2(f: TextIO,
               useindicators: bool = False,
               meas: Sequence[str] = None) -> Dict[str, Any]:
    """
    End users should use rinexheader()
    """
    if isinstance(f, (str, Path)):
        with opener(f, header=True) as h:
            return obsheader2(h, useindicators, meas)

    f.seek(0)
    # %% selection
    if isinstance(meas, str):
        meas = [meas]

    if not meas or not meas[0].strip():
        meas = None

    hdr = rinexinfo(f)
    Nobs = 0  # not None due to type checking

    for ln in f:
        if "END OF HEADER" in ln:
            break

        h = ln[60:80].strip()
        c = ln[:60]
        # %% measurement types
        if "# / TYPES OF OBSERV" in h:
            if Nobs == 0:
                Nobs = int(c[:6])
                hdr[h] = c[6:].split()
            else:
                hdr[h] += c[6:].split()
        elif h not in hdr:  # Header label
            hdr[h] = c  # string with info
        else:  # concatenate
            hdr[h] += " " + c
    # %% useful values
    try:
        hdr["systems"] = hdr["RINEX VERSION / TYPE"][40]
    except KeyError:
        pass

    hdr["Nobs"] = Nobs
    # 5 observations per line (incorporating LLI, SSI)
    hdr["Nl_sv"] = ceil(hdr["Nobs"] / 5)
    # %% list with receiver location in x,y,z cartesian ECEF (OPTIONAL)
    try:
        hdr["position"] = [
            float(j) for j in hdr["APPROX POSITION XYZ"].split()
        ]
        if ecef2geodetic is not None:
            hdr["position_geodetic"] = ecef2geodetic(*hdr["position"])
    except (KeyError, ValueError):
        pass
    # %% observation types
    try:
        hdr["fields"] = hdr["# / TYPES OF OBSERV"]
        if hdr["Nobs"] != len(hdr["fields"]):
            logging.error(
                f"{f.name} number of observations declared in header does not match fields"
            )
            hdr["Nobs"] = len(hdr["fields"])

        if isinstance(meas, (tuple, list, np.ndarray)):
            ind = np.zeros(len(hdr["fields"]), dtype=bool)
            for m in meas:
                for i, f in enumerate(hdr["fields"]):
                    if f.startswith(m):
                        ind[i] = True

            hdr["fields_ind"] = np.nonzero(ind)[0]
        else:
            ind = np.s_[:]
            hdr["fields_ind"] = np.arange(hdr["Nobs"])

        hdr["fields"] = np.array(hdr["fields"])[ind].tolist()
    except KeyError:
        pass

    hdr["Nobsused"] = hdr["Nobs"]
    if useindicators:
        hdr["Nobsused"] *= 3

    # %%
    try:
        hdr["# OF SATELLITES"] = int(hdr["# OF SATELLITES"][:6])
    except (KeyError, ValueError):
        pass
    # %% time
    try:
        hdr["t0"] = _timehdr(hdr["TIME OF FIRST OBS"])
    except (KeyError, ValueError):
        pass

    try:
        hdr["t1"] = _timehdr(hdr["TIME OF LAST OBS"])
    except (KeyError, ValueError):
        pass

    try:  # This key is OPTIONAL
        hdr["interval"] = float(hdr["INTERVAL"][:10])
    except (KeyError, ValueError):
        pass

    try:
        s = " "
        hdr["rxmodel"] = s.join(hdr["REC # / TYPE / VERS"].split()[1:-1])
    except (KeyError, ValueError):
        pass

    return hdr
Esempio n. 6
0
 def __write_header(self, file_handle: TextIO) -> None:
     file_handle.write("# {0}\n".format(datetime.now().isoformat()))
     file_handle.write('# {}\n'.format(self._comment))
     file_handle.write('# {} Hz\n'.format(self._device.freq))
     file_handle.write('# {} V\n'.format(self._device.slvl))
     file_handle.write('# {} Time constant\n'.format(self._device.oflt))
     file_handle.write("# pre resistance {0} OHM\n".format(
         self._pre_resistance))
     file_handle.write("# sweep rate {0} T/min\n".format(self._sweep_rate))
     file_handle.write(
         "Datetime Field Real Imaginary Amplitude Theta Sensitivity T1 T2 T3\n"
     )
Esempio n. 7
0
def close_data(file: TextIO):
    file.close()
Esempio n. 8
0
def obsheader3(f: TextIO,
               use: Sequence[str] = None,
               meas: Sequence[str] = None) -> Dict[str, Any]:
    """
    get RINEX 3 OBS types, for each system type
    optionally, select system type and/or measurement type to greatly
    speed reading and save memory (RAM, disk)
    """
    if isinstance(f, (str, Path)):
        with opener(f, header=True) as h:
            return obsheader3(h, use, meas)

    fields = {}
    Fmax = 0

    # %% first line
    hdr = rinexinfo(f)

    for ln in f:
        if "END OF HEADER" in ln:
            break

        h = ln[60:80]
        c = ln[:60]
        if 'SYS / # / OBS TYPES' in h:
            k = c[0]
            fields[k] = c[6:60].split()
            N = int(c[3:6])
            # %% maximum number of fields in a file, to allow fast Numpy parse.
            Fmax = max(N, Fmax)

            n = N - 13
            while n > 0:  # Rinex 3.03, pg. A6, A7
                ln = f.readline()
                assert 'SYS / # / OBS TYPES' in ln[60:]
                fields[k] += ln[6:60].split()
                n -= 13

            assert len(fields[k]) == N

            continue

        if h.strip() not in hdr:  # Header label
            hdr[h.strip()] = c  # don't strip for fixed-width parsers
            # string with info
        else:  # concatenate to the existing string
            hdr[h.strip()] += " " + c

# %% list with x,y,z cartesian (OPTIONAL)
    try:
        hdr['position'] = [
            float(j) for j in hdr['APPROX POSITION XYZ'].split()
        ]
        if ecef2geodetic is not None:
            hdr['position_geodetic'] = ecef2geodetic(*hdr['position'])
    except (KeyError, ValueError):
        pass
# %% time
    try:
        t0s = hdr['TIME OF FIRST OBS']
        # NOTE: must do second=int(float()) due to non-conforming files
        hdr['t0'] = datetime(year=int(t0s[:6]),
                             month=int(t0s[6:12]),
                             day=int(t0s[12:18]),
                             hour=int(t0s[18:24]),
                             minute=int(t0s[24:30]),
                             second=int(float(t0s[30:36])),
                             microsecond=int(float(t0s[30:43]) % 1 * 1000000))
    except (KeyError, ValueError):
        pass

    try:
        hdr['interval'] = float(hdr['INTERVAL'][:10])
    except (KeyError, ValueError):
        pass
# %% select specific satellite systems only (optional)
    if use is not None:
        if not set(fields.keys()).intersection(use):
            raise KeyError(f'system type {use} not found in RINEX file')

        fields = {k: fields[k] for k in use if k in fields}

    # perhaps this could be done more efficiently, but it's probably low impact on overall program.
    # simple set and frozenset operations do NOT preserve order, which would completely mess up reading!
    sysind = {}
    if isinstance(meas, (tuple, list, np.ndarray)):
        for sk in fields:  # iterate over each system
            # ind = np.isin(fields[sk], meas)  # boolean vector
            ind = np.zeros(len(fields[sk]), dtype=bool)
            for m in meas:
                for i, f in enumerate(fields[sk]):
                    if f.startswith(m):
                        ind[i] = True

            fields[sk] = np.array(fields[sk])[ind].tolist()
            sysind[sk] = np.empty(Fmax * 3, dtype=bool)  # *3 due to LLI, SSI
            for j, i in enumerate(ind):
                sysind[sk][j * 3:j * 3 + 3] = i
    else:
        sysind = {k: slice(None) for k in fields}

    hdr['fields'] = fields
    hdr['fields_ind'] = sysind
    hdr['Fmax'] = Fmax

    return hdr
Esempio n. 9
0
 def __write_header(self, file_handle: TextIO) -> None:
     file_handle.write("# {0}\n".format(datetime.now().isoformat()))
     file_handle.write('# {}\n'.format(self._comment))
     file_handle.write('# {} V\n'.format(self._voltage))      
     file_handle.write('# {} A-max\n'.format(self._current_limit))  
     file_handle.write("# sweep rate {0} K/min\n".format(self._sweep_rate))
     file_handle.write("Datetime Voltage Current T1 T2 T3\n")
Esempio n. 10
0
def analyse_od(model: str, dataset: str, split: str, pivot_file: TextIO):
    """
    TODO
    """
    if split == "kh":
        return

    source_dataset = load_dataset(f"{dataset}.txt")

    label_indices = get_label_indices(source_dataset)

    numeric_labels = list(range(len(label_indices)))
    num_labels = len(numeric_labels)

    split_name = split if split != "kh" else f"kh-{model}"
    split_path = f"{dataset}.strat-0.15.{split_name}.splits"

    holdout_dataset = load_dataset(os.path.join(split_path, "holdout.txt"))

    schedule_dataset = load_dataset(os.path.join(split_path, "schedule.txt"))

    y_true = [label_indices[label] for label in holdout_dataset.values()]

    splitter = TopNSplitter(50)

    iteration = 0
    cumulative_corrections = 0
    _, remaining_dataset = splitter(schedule_dataset)
    while True:
        holdout_predictions_path = os.path.join(
            split_path, f"{model}/{iteration}/predictions")

        if not os.path.exists(holdout_predictions_path):
            break

        holdout_predictions = load_rois_predictions(holdout_predictions_path,
                                                    holdout_dataset,
                                                    num_labels)

        y_score = list(holdout_predictions.values())

        y_score = [
            coerce_incorrect(num_labels, truth, prediction)
            for truth, prediction in zip(y_true, y_score)
        ]

        top_1 = top_k_accuracy_score(y_true,
                                     y_score,
                                     k=1,
                                     labels=numeric_labels,
                                     normalize=True)

        pivot_file.write(",".join(
            map(str, [
                model, dataset, split, iteration, "holdout", "accuracy", top_1
            ])) + "\n")

        update_dataset, remaining_dataset = splitter(remaining_dataset)

        update_predictions_path = os.path.join(
            split_path, f"{model}/{iteration}/update_predictions")

        if os.path.exists(update_predictions_path):
            update_y_true = [
                label_indices[label] for label in update_dataset.values()
            ]
            update_predictions = load_rois_predictions(update_predictions_path,
                                                       update_dataset,
                                                       num_labels)
            update_y_score = list(update_predictions.values())
            update_y_score = [
                coerce_incorrect(num_labels, truth, prediction)
                for truth, prediction in zip(update_y_true, update_y_score)
            ]
            update_top_1 = top_k_accuracy_score(update_y_true,
                                                update_y_score,
                                                k=1,
                                                labels=numeric_labels,
                                                normalize=True)
            pivot_file.write(",".join(
                map(str, [
                    model, dataset, split, iteration, "update", "accuracy",
                    update_top_1
                ])) + "\n")
            cumulative_corrections += int((1 - update_top_1) * 50)
            pivot_file.write(",".join(
                map(str, [
                    model, dataset, split, iteration, "update",
                    "cumulative_corrections", cumulative_corrections
                ])) + "\n")

        iteration += 1
Esempio n. 11
0
def hash_files(docs: Iterable[DocFile], out: TextIO):

    out.write('{\n')
    out.write('"generator": "stramp",\n')
    out.write('"documents": [\n')

    first = True
    for doc in docs:

        if doc.file_format == 'org':
            from stramp.parsers.org_parser import load_file as load_org_file
            load_org_file(doc)
        elif doc.file_format in ('commonmark', 'markdown'):
            from stramp.parsers.markdown_parser import load_file as load_markdown_file
            load_markdown_file(doc)
        else:
            raise ValueError(f'Unsupported file format {doc.file_format!r}')

        with io.StringIO() as file_json:  # type: Union[TextIO, io.StringIO]

            write_file_hash_json(doc, file_json)

            if not first:
                out.write(',\n')
            first = False
            out.write(file_json.getvalue())

    out.write(']}\n')
Esempio n. 12
0
def write_file_hash_json(doc: DocFile,
                         out: TextIO,
                         hash_algorithm: str = 'sha256'):

    assert doc.root_heading is not None

    epoch = datetime.datetime(1970, 1, 1)
    mtime_dt = epoch + datetime.timedelta(
        microseconds=doc.file_stat.st_mtime_ns // 1000)
    mtime_str = mtime_dt.isoformat(timespec='microseconds') + 'Z'

    metadata = {
        'file_path':
        str(doc.file_path),
        'file_date':
        mtime_str,
        'read_datetime':
        doc.file_read_datetime.isoformat(timespec='microseconds') + 'Z',
        'hash_algorithm':
        hash_algorithm,
    }

    out.write('{\n')
    for k, v in metadata.items():
        out.write(f'{json.dumps(k)}: {json.dumps(v)},\n')

    out.write('"range_hashes": [\n')

    first = True

    for h in PreOrderIter(doc.root_heading):

        if not first:
            out.write(',\n')
        first = False

        record = [
            h.start_offset,
            h.end_offset,
            hashlib.new(
                hash_algorithm,
                doc.file_bytes[h.start_offset:h.end_offset]).hexdigest(),
            # h.text,
        ]
        out.write(json.dumps(record))

    out.write(']}\n')
Esempio n. 13
0
def set_pos_after_license(file: TextIO) -> None:
    for line in file:
        if re.search(r"SPDX-License-Identifier", line):
            return
    file.seek(0, 0)
    return
Esempio n. 14
0
def _write_continuous_tensor_maps(f: TextIO, db_client: DatabaseClient):
    # Handle special coding values in continuous variables in order to generate summary statistics (mean and std dev) for
    # each field across all samples. This will remove missing samples from the calculation and change the value of 'Less than one'
    query = f"""
    WITH coding_tmp AS (
        SELECT
            *,
            CASE
                WHEN meaning IN ('Do not know',  'Prefer not to answer', 'Ongoing when data entered') OR meaning LIKE "Still taking%" THEN TRUE
            END AS missing,
            CASE
                WHEN meaning IN {LESS_THAN_CODES} THEN '.5'
            END AS value
        FROM
            {CODING_TABLE}
    ), pheno_tmp AS (
    SELECT
        sample_id,
        FieldID,
        instance,
        array_idx,
        COALESCE(c.value, p.value) new_value,
        COALESCE(c.missing, FALSE) missing
    FROM {PHENOTYPE_TABLE} AS p
    LEFT JOIN coding_tmp AS c
        ON TRUE
        AND SAFE_CAST(p.value AS FLOAT64) = SAFE_CAST(c.coding AS FLOAT64)
        AND p.coding_file_id = c.coding_file_id
    )

    SELECT
        t.FieldID,
        Field,
        t.instance,
        AVG(CAST(new_value AS FLOAT64)) mean,
        STDDEV(CAST(new_value AS FLOAT64)) std,
        MAX(array_idx) AS max_array
    FROM pheno_tmp AS t
    LEFT JOIN {DICTIONARY_TABLE} AS d ON d.FieldID = t.FieldID
    WHERE TRUE
        AND ValueType IN ('Integer', 'Continuous')
        AND NOT missing
    GROUP BY t.FieldID, t.instance, Field ORDER BY t.FieldID
    """

    field_data_for_tensor_maps = db_client.execute(query)

    f.write(f"\n\n#  Continuous tensor maps\n")
    for row in field_data_for_tensor_maps:
        name = dataset_name_from_meaning(
            None, [str(row.FieldID), row.Field,
                   str(row.instance)])
        channel_map = "channel_map={"
        for i in range(0, row.max_array + 1):
            channel_map += f"'{name}{JOIN_CHAR}{i}': {i}, "
        channel_map += "}"
        f.write(
            f"ukb_{row.FieldID}_{row.instance} = TensorMap('{name}{JOIN_CHAR}{i}', loss='logcosh', path_prefix='continuous', "
        )
        f.write(
            f"normalization={{'mean': {row.mean}, 'std': {row.std}}}, annotation_units={row.max_array+1}, {channel_map})\n"
        )
Esempio n. 15
0
 def to_gct(self, f: TextIO, tabular_writer='to_txt'):
     f.write('#1.2\n')
     expression_data = self.joined
     assert expression_data.notnull().all().all()
     f.write(f'{len(expression_data)}\t{len(expression_data.columns)}\n')
     getattr(self, tabular_writer)(f, expression_data)
Esempio n. 16
0
def obsheader3(f: TextIO,
               use: Sequence[str] = None,
               meas: Sequence[str] = None) -> Dict[str, Any]:
    """
    get RINEX 3 OBS types, for each system type
    optionally, select system type and/or measurement type to greatly
    speed reading and save memory (RAM, disk)
    """
    if isinstance(f, (str, Path)):
        with opener(f, header=True) as h:
            return obsheader3(h, use, meas)

    fields = {}
    Fmax = 0

# %% first line
    hdr = rinexinfo(f)

    for ln in f:
        if "END OF HEADER" in ln:
            break

        h = ln[60:80]
        c = ln[:60]
        if 'SYS / # / OBS TYPES' in h:
            k = c[0]
            fields[k] = c[6:60].split()
            N = int(c[3:6])
# %% maximum number of fields in a file, to allow fast Numpy parse.
            Fmax = max(N, Fmax)

            n = N-13
            while n > 0:  # Rinex 3.03, pg. A6, A7
                ln = f.readline()
                assert 'SYS / # / OBS TYPES' in ln[60:]
                fields[k] += ln[6:60].split()
                n -= 13

            assert len(fields[k]) == N

            continue

        if h.strip() not in hdr:  # Header label
            hdr[h.strip()] = c  # don't strip for fixed-width parsers
            # string with info
        else:  # concatenate to the existing string
            hdr[h.strip()] += " " + c

# %% list with x,y,z cartesian (OPTIONAL)
    try:
        hdr['position'] = [float(j) for j in hdr['APPROX POSITION XYZ'].split()]
        if ecef2geodetic is not None:
            hdr['position_geodetic'] = ecef2geodetic(*hdr['position'])
    except (KeyError, ValueError):
        pass
# %% time
    try:
        t0s = hdr['TIME OF FIRST OBS']
        # NOTE: must do second=int(float()) due to non-conforming files
        hdr['t0'] = datetime(year=int(t0s[:6]), month=int(t0s[6:12]), day=int(t0s[12:18]),
                             hour=int(t0s[18:24]), minute=int(t0s[24:30]), second=int(float(t0s[30:36])),
                             microsecond=int(float(t0s[30:43]) % 1 * 1000000))
    except (KeyError, ValueError):
        pass

    try:
        hdr['interval'] = float(hdr['INTERVAL'][:10])
    except (KeyError, ValueError):
        pass
# %% select specific satellite systems only (optional)
    if use is not None:
        if not set(fields.keys()).intersection(use):
            raise KeyError(f'system type {use} not found in RINEX file')

        fields = {k: fields[k] for k in use if k in fields}

    # perhaps this could be done more efficiently, but it's probably low impact on overall program.
    # simple set and frozenset operations do NOT preserve order, which would completely mess up reading!
    sysind = {}
    if isinstance(meas, (tuple, list, np.ndarray)):
        for sk in fields:  # iterate over each system
            # ind = np.isin(fields[sk], meas)  # boolean vector
            ind = np.zeros(len(fields[sk]), dtype=bool)
            for m in meas:
                for i, f in enumerate(fields[sk]):
                    if f.startswith(m):
                        ind[i] = True

            fields[sk] = np.array(fields[sk])[ind].tolist()
            sysind[sk] = np.empty(Fmax*3, dtype=bool)  # *3 due to LLI, SSI
            for j, i in enumerate(ind):
                sysind[sk][j*3:j*3+3] = i
    else:
        sysind = {k: slice(None) for k in fields}

    hdr['fields'] = fields
    hdr['fields_ind'] = sysind
    hdr['Fmax'] = Fmax

    return hdr
Esempio n. 17
0
def write_look_ml(f: TextIO, info: TableInfo):
    # write view
    f.write(f'view: {info.dataset_id}__{info.clear_name} {{\n')
    f.write(
        f'  sql_table_name: `{info.project_id}.{info.dataset_id}.{info.clear_name}'
    )
    if info.is_sharding():
        f.write('_*')
    f.write('`\n    ;;\n\n')

    for field in info.schema:
        write_field(f, field)

    # measure count
    f.write('  measure: count {\n')
    f.write('    type: count\n')

    drill_fields: List[str] = []
    if 'id' in [field.name for field in info.schema]:
        drill_fields.append('id')
    if 'name' in [field.name for field in info.schema]:
        drill_fields.append('name')

    f.write(f'    drill_fields: [{", ".join(drill_fields)}]\n')
    f.write('  }\n')
    f.write('}\n\n')

    for field in filter(lambda x: x.field_type == 'RECORD', info.schema):
        write_record_child(f,
                           field,
                           prefix=f'{info.dataset_id}__{info.clear_name}__')
Esempio n. 18
0
def tokenize(input_fd: io.TextIO) -> Iterator[Tuple[Optional[str], int, int]]:
    """
    Read characters from input file one-by-one and generate stream of tokens with metadata, separated by spaces or ';'
    ';' and \n are replaced by None, meaning end of line
    escaped characters in string literal are unescaped.
    Generate tuples (value:str_or_none, line:int, start_position_in_line:int)
    """
    curr_buff = None   # current token or None is no active token
    line_num = 0
    line_pos = 0
    in_str = False  # true, if now parsing string literal (==in quotes)
    token_start_line = None   # line_num & line_pos at the beginning of current token
    token_start_pos = None
    eof_reached = False

    while not eof_reached:
        ch = input_fd.read(1)

        if ch == '':
            if in_str:
                raise  ParserError("Unclosed string at the end of file at line {}".format(line_num))
            ch = '\n'   # hack to flush current buffer
            eof_reached = True

        line_pos += 1

        if in_str:
            if ch == '\\':
                ch = input_fd.read(1)
                if ch == '':
                    return

                line_pos += 1

                if ch not in ESCAPED:
                    raise  ParserError("Failed to parse at line {}".format(line_num))

                curr_buff += ESCAPED[ch]
            elif ch == '"':
                yield curr_buff + '"', token_start_line, token_start_pos
                token_start_pos = None
                token_start_line = None
                curr_buff = None
                in_str = False
            elif ch == '\n':
                raise ParserError("Failed to parse at line {}".format(line_num))
            else:
                curr_buff += ch
        elif ch == '"':
            in_str = True
            curr_buff = ch
            token_start_pos = line_pos
            token_start_line = line_num
        elif ch.isspace():
            if curr_buff is not None:
                yield curr_buff, token_start_line, token_start_pos
                curr_buff = None
                token_start_pos = None
                token_start_line = None

            if ch == '\n':
                yield None, line_num, line_pos
                line_num += 1
                line_pos = 0

        elif ch == ';':
            yield None, line_num, line_pos
        else:
            if curr_buff is None:
                curr_buff = ch
                token_start_pos = line_pos
                token_start_line = line_num
            else:
                curr_buff += ch
    assert curr_buff is None, "Non flushed token at the end of file"
Esempio n. 19
0
def write_field(f: TextIO, field: bigquery.SchemaField):
    if field.field_type in ['TIME', 'TIMESTAMP', 'DATE', 'DATETIME']:
        f.write(f'  dimension_group: {field.name} {{\n')
    else:
        f.write(f'  dimension: {field.name} {{\n')
        if field.name == 'id':
            f.write('    primary_key: yes\n')

    if field.field_type in ['INTEGER', 'FLOAT', 'NUMERIC']:
        f.write('    type: number\n')
    elif field.field_type == 'BOOLEAN':
        f.write('    type: yesno\n')
    elif field.field_type in ['TIME', 'TIMESTAMP', 'DATE', 'DATETIME']:
        f.write('    type: time\n')
        f.write('    timeframes: [\n')
        f.write('      raw,\n')
        if field.field_type != 'DATE':
            f.write('      time,\n')
        f.write('      date,\n')
        f.write('      week,\n')
        f.write('      month,\n')
        f.write('      quarter,\n')
        f.write('      year\n')
        f.write('    ]\n')
        if field.field_type == 'DATE':
            f.write('    convert_tz: no\n')
            f.write('    datatype: date\n')
    elif field.field_type == 'RECORD':
        f.write('    hidden: yes\n')
    else:
        f.write('    type: string\n')

    f.write(f'    sql: ${{TABLE}}.{field.name} ;;\n')
    f.write('  }\n\n')
Esempio n. 20
0
def del_with_p(data, file: TextIO):
    line = "".join(data[0].xpath(".//text()")).replace("\n", "") + "\n"
    file.write(line)
Esempio n. 21
0
 def write_svg(self, f: TextIO):
     f.write(f'<{self.diagram_item.name}')
     for name, value in sorted(self.diagram_item.attrs.items()):
         f.write(f' {name}="{e(value)}"')
     f.write(f' data-dbg-cls="{self.diagram_item.__class__.__name__}"'
             f' data-dbg-w="{self.diagram_item.width}"')
     f.write('>')
     for child in self.children:
         if isinstance(child, FormattedItem):
             child.write_svg(f)
         else:
             f.write(e(child))
     f.write(f'</{self.diagram_item.name}>')
Esempio n. 22
0
    def __write_header(self, file_handle: TextIO) -> None:
        """Write a file header for present settings.

        Arguments:
            file_handle: The open file to write to
        """
        file_handle.write("# {0}\n".format(datetime.now().isoformat()))
        file_handle.write('# {}\n'.format(self._comment))
        file_handle.write("# maximum voltage {0} V\n".format(
            self._max_voltage))
        file_handle.write("# current limit {0} A\n".format(
            self._current_limit))
        file_handle.write('# nplc {}\n'.format(self._nplc))
        file_handle.write("Datetime Voltage Current\n")
Esempio n. 23
0
def obsheader2(f: TextIO,
               useindicators: bool = False,
               meas: Sequence[str] = None) -> Dict[str, Any]:
    """
    End users should use rinexheader()
    """
    if isinstance(f, (str, Path)):
        with opener(f, header=True) as h:
            return obsheader2(h, useindicators, meas)

    f.seek(0)
# %% selection
    if isinstance(meas, str):
        meas = [meas]

    if not meas or not meas[0].strip():
        meas = None

    hdr = rinexinfo(f)
    Nobs = 0  # not None due to type checking

    for ln in f:
        if "END OF HEADER" in ln:
            break

        h = ln[60:80].strip()
        c = ln[:60]
# %% measurement types
        if '# / TYPES OF OBSERV' in h:
            if Nobs == 0:
                Nobs = int(c[:6])
                hdr[h] = c[6:].split()
            else:
                hdr[h] += c[6:].split()
        elif h not in hdr:  # Header label
            hdr[h] = c  # string with info
        else:  # concatenate
            hdr[h] += " " + c
# %% useful values
    try:
        hdr['systems'] = hdr['RINEX VERSION / TYPE'][40]
    except KeyError:
        pass

    hdr['Nobs'] = Nobs
    # 5 observations per line (incorporating LLI, SSI)
    hdr['Nl_sv'] = ceil(hdr['Nobs'] / 5)
# %% list with receiver location in x,y,z cartesian ECEF (OPTIONAL)
    try:
        hdr['position'] = [float(j) for j in hdr['APPROX POSITION XYZ'].split()]
        if ecef2geodetic is not None:
            hdr['position_geodetic'] = ecef2geodetic(*hdr['position'])
    except (KeyError, ValueError):
        pass
# %% observation types
    try:
        hdr['fields'] = hdr['# / TYPES OF OBSERV']
        if Nobs != len(hdr['fields']):
            raise ValueError(f'{f.name} header read incorrectly')

        if isinstance(meas, (tuple, list, np.ndarray)):
            ind = np.zeros(len(hdr['fields']), dtype=bool)
            for m in meas:
                for i, f in enumerate(hdr['fields']):
                    if f.startswith(m):
                        ind[i] = True

            hdr['fields_ind'] = np.nonzero(ind)[0]
        else:
            ind = slice(None)
            hdr['fields_ind'] = np.arange(Nobs)

        hdr['fields'] = np.array(hdr['fields'])[ind].tolist()
    except KeyError:
        pass

    hdr['Nobsused'] = hdr['Nobs']
    if useindicators:
        hdr['Nobsused'] *= 3

# %%
    try:
        hdr['# OF SATELLITES'] = int(hdr['# OF SATELLITES'][:6])
    except (KeyError, ValueError):
        pass
# %% time
    try:
        hdr['t0'] = _timehdr(hdr['TIME OF FIRST OBS'])
    except (KeyError, ValueError):
        pass

    try:
        hdr['t1'] = _timehdr(hdr['TIME OF LAST OBS'])
    except (KeyError, ValueError):
        pass

    try:  # This key is OPTIONAL
        hdr['interval'] = float(hdr['INTERVAL'][:10])
    except (KeyError, ValueError):
        pass

    return hdr
Esempio n. 24
0
def obsheader2(f: TextIO,
               useindicators: bool = False,
               meas: Sequence[str] = None) -> Dict[str, Any]:

    if isinstance(f, Path):
        fn = f
        with opener(fn, header=True) as f:
            return obsheader2(f, useindicators, meas)

# %% selection
    if isinstance(meas, str):
        meas = [meas]

    if not meas or not meas[0].strip():
        meas = None

    hdr: Dict[str, Any] = {}
    Nobs = 0  # not None due to type checking

    for ln in f:
        if "END OF HEADER" in ln:
            break

        h = ln[60:80].strip()
        c = ln[:60]
        # %% measurement types
        if '# / TYPES OF OBSERV' in h:
            if Nobs == 0:
                Nobs = int(c[:6])

            c = c[6:].split()  # NOT within "if Nobs"
# %%
        if h not in hdr:  # Header label
            hdr[h] = c  # string with info
        else:  # concatenate
            if isinstance(hdr[h], str):
                hdr[h] += " " + c
            elif isinstance(hdr[h], list):
                hdr[h] += c
            else:
                raise ValueError(f'not sure what {c} is')
# %% useful values
    hdr['version'] = float(hdr['RINEX VERSION / TYPE'][:9])  # %9.2f
    hdr['systems'] = hdr['RINEX VERSION / TYPE'][40]
    hdr['Nobs'] = Nobs
    hdr['Nl_sv'] = ceil(hdr['Nobs'] /
                        5)  # 5 observations per line (incorporating LLI, SSI)

    # %% list with receiver location in x,y,z cartesian ECEF (OPTIONAL)
    try:
        hdr['position'] = [
            float(j) for j in hdr['APPROX POSITION XYZ'].split()
        ]
        if ecef2geodetic is not None:
            hdr['position_geodetic'] = ecef2geodetic(*hdr['position'])
    except KeyError:
        pass
# %% observation types
    hdr['fields'] = hdr['# / TYPES OF OBSERV']
    if Nobs != len(hdr['fields']):
        raise ValueError(f'{f.name} header read incorrectly')

    if isinstance(meas, (tuple, list, np.ndarray)):
        ind = np.zeros(len(hdr['fields']), dtype=bool)
        for m in meas:
            for i, f in enumerate(hdr['fields']):
                if f.startswith(m):
                    ind[i] = True

        hdr['fields_ind'] = np.nonzero(ind)[0]
    else:
        ind = slice(None)
        hdr['fields_ind'] = np.arange(Nobs)

    hdr['fields'] = np.array(hdr['fields'])[ind].tolist()

    hdr['Nobsused'] = hdr['Nobs']
    if useindicators:
        hdr['Nobsused'] *= 3

# %%
    if '# OF SATELLITES' in hdr:
        hdr['# OF SATELLITES'] = int(hdr['# OF SATELLITES'][:6])


# %% time
    hdr['t0'] = _timehdr(hdr['TIME OF FIRST OBS'])

    try:
        hdr['t1'] = _timehdr(hdr['TIME OF LAST OBS'])
    except KeyError:
        pass

    try:  # This key is OPTIONAL
        hdr['interval'] = float(hdr['INTERVAL'][:10])
    except (KeyError, ValueError):
        hdr['interval'] = np.nan  # do NOT set it to None or it breaks NetCDF writing

    return hdr
Esempio n. 25
0
    def save_history_for_conversation(self, f: TextIO, id: int = 0):

        self.update_status_history_for_conversation(id)
        dump(self.status_per_conversation[id], f)
        f.write('\n')