def parse_pharmgkb_line(self, this_line: str, header_items) -> dict:
        """Parse a single line from relationships.tsv and return a dict with data

        :param this_line: line from relationship.tsv to parse
        :param header_items: header from relationships.tsv
        :return: dict with key value containing data
        """
        items = this_line.strip().split('\t')
        return data_to_dict(header_items, items)
Example #2
0
def parse_drug_central_line(this_line: str, header_items: List) -> Dict:
    """Methods processes a line of text from Drug Central.

    Args:
        this_line: A string containing a line of text.
        header_items: A list of header items.

    Returns:
        item_dict: A dictionary of header items and a processed Drug Central string.
    """

    data = this_line.strip().split("\t")
    data = [i.replace('"', '') for i in data]
    item_dict = data_to_dict(header_items, data)

    return item_dict
    def make_id_mapping_file(self,
                             map_file: str,
                             sep: str = '\t',
                             pharmgkb_id_col: str = 'PharmGKB Accession Id',
                             id_key: str = 'Cross-references',
                             id_sep: str = ',',
                             id_key_val_sep: str = ':') -> dict:
        """Fxn to parse gene ID mappings or drug ID mapping for PharmGKB ids
        This is to parse both genes.tsv and drugs.tsv files

        :param map_file: genes.tsv file, containing mappings
        :param pharmgkb_id_col: column containing pharmgkb, to be used as key for map
        :param sep: separator between columns [\t]
        :param id_key: column name that contains ids [Cross-references]
        :param id_sep: separator between each id key:val pair [,]
        :param id_key_val_sep: separator between key:val pair [:]
        :return:
        """
        map: dict = defaultdict()
        with open(map_file) as f:
            header_items = f.readline().split(sep)
            if pharmgkb_id_col not in header_items:
                raise CantFindPharmGKBKey(
                    "Can't find PharmGKB id in map file!")
            for line in f:
                items = line.strip().split(sep)
                dat = data_to_dict(header_items, items)
                if id_key in dat:
                    for item in dat[id_key].split(id_sep):
                        if not item:
                            continue  # not xrefs, skip
                        item = item.strip(
                            '\"')  # remove quotes around each item
                        key, value = item.split(id_key_val_sep,
                                                1)  # split on first :
                        if self.key_parsed_ids not in dat:
                            dat[self.key_parsed_ids] = dict()
                        dat[self.key_parsed_ids][key] = value
                map[dat[pharmgkb_id_col]] = dat
        return map