コード例 #1
0
ファイル: _ls.py プロジェクト: nh13/gedmatch-tools
def _ls(driver: Optional[WebDriver] = None, status: Optional[List[KitStatus]] = None) -> List[Kit]:
    '''Returns the the kits available on GEDMatch.com

    Args:
        status: return only kits with the given status(es)
    '''

    kits: List[Kit] = []

    _driver = main_page() if driver is None else driver

    try:
        root = lxml.html.fromstring(_driver.page_source)
        for row in root.xpath(KITS_XPATH + '//tr'):
            kit: Kit = kit_from_lxml_row(row)
            if status is None or kit.status in status:
                kits.append(kit)
    except Exception as e:
        _driver.close()
        raise e

    if driver is None:
        _driver.close()

    return kits
コード例 #2
0
def rm(*number: str) -> None:
    '''Removes the kit(s) with the given number(s).'''

    credentials = Credentials.build()
    driver = main_page()
    last_time = datetime.now()
    num_kits = len(number)
    for i, n in enumerate(number, 1):
        logging.info(f'Deleting kit: {n} {i}/{num_kits}')
        # Re-login if it takes too long
        if (datetime.now() - last_time).total_seconds() > 30:
            driver.close()
            driver = main_page()
        last_time = datetime.now()
        _rm_impl(number=n, credentials=credentials, driver=driver)

    driver.close()
コード例 #3
0
ファイル: one_to_many.py プロジェクト: nh13/gedmatch-tools
def one_to_many_tuples(
    *,
    kits: List[str],
    output_prefix: Path,
    max_matches: Optional[int] = None
) -> List[Optional[List[OneToManyAutosomeResult]]]:
    '''Performs one-to-many autosomal analysis.

    Args:
        kits: one or more kit names or numbers.
        output_prefix: the path prefix for the output files.
        max_matches: the maximum # of matches to return
    '''

    driver = main_page()

    logging.info(f'retrieving list of kits.')
    kits_dict: Dict[str, Kit] = dict([(kit.number, kit) for kit in ls(driver)])

    results: List[Optional[List[OneToManyAutosomeResult]]] = []
    logging.info(f'processing {len(kits)} kit pairs.')
    for i, kit in enumerate(kits, 1):
        output = Path(str(output_prefix) + f'{kit}.txt')
        logging.info(f'processing ({i}/{len(kits)}): {kit}: {output}')
        result = one_to_many_api(kit=kit,
                                 output=output,
                                 max_matches=max_matches,
                                 kits=kits_dict,
                                 driver=driver)
        if result is None:
            logging.warning(f'No 1:1 autosomal match found for kit {kit}.')
        results.append(result)

        # go to home page
        xpath = '/html/body/table[1]/tbody/tr[2]/td[2]/table/tbody/tr/td[1]/a'
        driver.find_element_by_xpath(xpath).click()
        WebDriverWait(driver, 90).until(
            expected_conditions.presence_of_element_located(
                (By.XPATH, HOME_PAGE_XPATH)))

    driver.close()

    return results
コード例 #4
0
ファイル: _rm.py プロジェクト: pythseq/gedmatch-tools
def _rm(number: str) -> None:
    '''Removes the kit with the given number.'''
    credentials = Credentials.build()

    driver = main_page()

    try:
        kits_xpath = '/html/body/center/table/tbody/tr[2]/td/center/table[1]/tbody/tr/td[1]/' + \
            'table/tbody/tr[4]/td/table/tbody/tr[3]/td/table'
        kits_table = driver.find_element_by_xpath(kits_xpath)
        for row in kits_table.find_elements_by_tag_name('tr'):
            columns = row.find_elements_by_tag_name('td')
            if columns[0].text != number:
                continue
            elem = columns[3].find_element_by_css_selector(
                "form[action='KitProfile.php']")
            elem.click()
            break

        tab = driver.find_element_by_css_selector("a[href='#2a']")
        tab.click()

        password = driver.find_element_by_css_selector(
            "input[type='PASSWORD'][name='VerifyPassword']")
        password.send_keys(credentials.password)

        submit = driver.find_element_by_css_selector(
            "input[type='SUBMIT'][value='Delete']")
        submit.click()

        # are you sure?
        alert_one = driver.switch_to.alert
        alert_one.accept()
        driver.switch_to.parent_frame()

        logging.info(f'Deleted kit: {number}')

    except Exception as e:
        print(driver.page_source)
        driver.close()
        raise e

    driver.close()
コード例 #5
0
ファイル: one_to_one.py プロジェクト: nh13/gedmatch-tools
def one_to_one_tuples(
        *, kits: List[str],
        output_prefix: Path) -> List[Optional[OneToOneAutosomeResult]]:
    '''Performs one-to-one autosomal analysis.

    Args:
        kits: one or more kit name or number tuples (comma seperated).
        output_prefix: the path prefix for the output files.
    '''

    driver = main_page()

    logging.info(f'retrieving list of kits.')
    kits_dict: Dict[str, Kit] = dict([(kit.number, kit) for kit in ls(driver)])

    results: List[Optional[OneToOneAutosomeResult]] = []
    logging.info(f'processing {len(kits)} kit pairs.')
    for i, kit_tuple in enumerate(kits, 1):
        kit_one, kit_two = kit_tuple.split(',')
        logging.info(
            f'processing ({i}/{len(kits)}) kits {kit_one} and {kit_two}.')
        tuple_output_prefix = Path(
            str(output_prefix) + f'.{kit_one}-{kit_two}')
        result = one_to_one_api(kit_one=kit_one,
                                kit_two=kit_two,
                                output_prefix=tuple_output_prefix,
                                kits=kits_dict,
                                driver=driver)
        if result is None:
            logging.warning(
                f'No 1:1 autosomal match found for kits {kit_one} and {kit_two}.'
            )
        results.append(result)

    logging.info(f'writing output.')
    summary: Path = Path(str(output_prefix) + '.summary.txt')
    write_metrics(summary, results)

    driver.close()

    return results
コード例 #6
0
ファイル: _ls.py プロジェクト: pythseq/gedmatch-tools
def _ls() -> List[Kit]:
    '''Returns the the kits available on GEDMatch.com'''

    kits: List[Kit] = []

    driver = main_page()

    try:
        kits_xpath = '/html/body/center/table/tbody/tr[2]/td/center/table[1]/tbody/tr/td[1]' + \
                     '/table/tbody/tr[4]/td/table/tbody/tr[3]/td/table'
        kits_table = driver.find_element_by_xpath(kits_xpath)
        for row in kits_table.find_elements_by_tag_name('tr'):
            columns = row.find_elements_by_tag_name('td')
            kit = Kit(name=columns[1].text, number=columns[0].text)
            kits.append(kit)
    except Exception as e:
        driver.close()
        raise e

    driver.close()

    return kits
コード例 #7
0
ファイル: _add.py プロジェクト: nh13/gedmatch-tools
def _add(genotypes: Path,
         name: str,
         raw_data_type: Optional[RawDataType] = None,
         fam: Optional[Path] = None) -> Kit:
    '''Performs a generic upload of the given genotype.

    The sample information when given will be used to determine the sex of the donor, otherwise
    it will default to female.

    Args:
        genotypes: the path to the genotype file.
        name: the name of the donor.
        raw_data_type: optionally the raw data type to select.
        fam: optionally a PLINK sample information file; see the following link
             https://www.cog-genomics.org/plink2/formats#fam

    Returns:
        the kit created by GEDMatch

    Raises:
        Exception: if the kit could not be uploaded
    '''
    kit_number: str = ''

    fam_dict = _read_fam(fam) if fam is not None else {}

    driver = main_page()

    try:
        url = 'v_upload1.phpnf'
        page = driver.find_element_by_xpath('//a[@href="' + url + '"]')
        page.click()

        in_name = driver.find_element_by_name('name')
        in_name.clear()
        in_name.send_keys(name)

        male = False if name not in fam_dict or not fam_dict[name].sex else True
        if male:
            in_male = driver.find_element_by_css_selector(
                "input[type='radio'][value='M'][name='sex']")
            in_male.click()
        else:
            in_female = driver.find_element_by_css_selector(
                "input[type='radio'][value='F'][name='sex']")
            in_female.click()

        in_source = Select(driver.find_element_by_name('source'))
        in_source.select_by_index(21)  # other

        raw_data_type_value = str(
            6 if raw_data_type is None else raw_data_type.value)
        in_auth = driver.find_element_by_css_selector(
            "input[type='radio'][value='" + raw_data_type_value +
            "'][name='auth']")
        in_auth.click()

        in_public = driver.find_element_by_css_selector(
            "input[type='radio'][value='Y'][name='public2']")
        in_public.click()

        in_file = driver.find_element_by_css_selector(
            "input[type='file'][name='GedcomFile']")
        in_file.send_keys(str(genotypes.resolve()))

        submit = driver.find_element_by_css_selector(
            "input[type='submit'][name='gedsubmit']")
        submit.click()

        wait_for_link_text = 'Click here to get to Home'
        WebDriverWait(driver, 90).until(
            expected_conditions.visibility_of_element_located(
                (By.LINK_TEXT, wait_for_link_text)))

        for line in driver.page_source.split('\n'):
            if 'Assigned kit number:' in line:
                line = line.rstrip('\r\n').strip().replace('</font>', '')
                kit_number = line.split('>')[-1]
                break
        else:
            raise ValueError('No kit number returned by GEDmatch.')

    except Exception as e:
        print(driver.page_source)
        driver.close()
        raise e

    driver.close()

    return Kit(name=name, number=kit_number)
コード例 #8
0
ファイル: _one_to_many.py プロジェクト: nh13/gedmatch-tools
def _one_to_many(
        kit: str,
        output: Path,
        max_matches: Optional[int],
        kits: Optional[Dict[str, Kit]] = None,
        driver: Optional[WebDriver] = None) -> List['OneToManyAutosomeResult']:
    '''Performs one-to-many autosomal analysis.

    Args:
        kit: the name or number
        output: the output file.
        max_matches: the maximum # of matches to return
        kits: a mapping of kit name to kit, useful when performing many 1:1 analyses.

    Returns:
        A list of matches.
    '''
    results: List['OneToManyAutosomeResult'] = []

    _driver = main_page() if driver is None else driver

    if kits is None:
        kits = dict([(kit.number, kit) for kit in _ls(_driver)])

    if kit not in kits:
        kit = [n for n, k in kits.items() if k.name == kit][0]

    try:
        url = f'OneToMany0Tier2.php?kit_num={kit}'
        page = _driver.find_element_by_xpath('//a[@href="' + url + '"]')
        page.click()

        root = lxml.html.fromstring(_driver.page_source)
        for table in root.xpath("//table"):
            rows = [row for row in table.xpath('.//tr')]

            # check that the first column in the first row has value "Chr"
            tds = rows[0].xpath('.//td')
            first_column_value = tds[0].text
            if first_column_value != 'Kit':
                continue
            header = [td.text for td in tds]
            assert len(header) == 10, f'header: {header}'

            last_row: int = len(
                rows) if max_matches is None else max_matches + 1
            logging.info(f'Reading rows for {kit}')
            for row_num, row in enumerate(rows[1:last_row], 1):
                columns = [maybe_href(td) for td in row.xpath('.//td')]
                assert len(columns) == 10, f'columns: {columns}'
                d = dict(zip(header, columns))

                result = OneToManyAutosomeResult(
                    kit_one=kits[kit],
                    kit_two=Kit(name=d['Name'],
                                number=d['Kit'],
                                email=d['Email'],
                                testing_company=d['Testing Company']),
                    largest_segment=float(d['Largest Seg']),
                    total_half_match_segments=float(d['Total cM']),
                    most_recent_common_ancestor=float(d['Gen']),
                    num_snps=int(d['Overlap']),
                    date_compared=d['Date Compared'])
                results.append(result)
            logging.info(f'Returning {len(results)} results.')
    except Exception as e:
        _driver.close()
        raise e

    if driver is None:
        _driver.close()

    write_metrics(output, results)

    return results
コード例 #9
0
def _one_to_one(
        kit_one: str,
        kit_two: str,
        output_prefix: Path,
        kits: Optional[Dict[str, Kit]] = None,
        driver: Optional[WebDriver] = None
) -> Optional['OneToOneAutosomeResult']:
    '''Performs one-to-one autosomal analysis.

    Args:
        kit_one: the first kit name or number
        kit_two: the second kit name or number
        output_prefix: the prefix for the output files.
        kits: a mapping of kit name to kit, useful when performing many 1:1 analyses.

    Returns:
        None if the analysis did not find any segments, otherwise the analysis results.
    '''
    _driver = main_page() if driver is None else driver

    if kits is None:
        kits = dict([(kit.number, kit) for kit in _ls(_driver)])

    if kit_one not in kits:
        kit_one = [n for n, k in kits.items() if k.name == kit_one][0]
    if kit_two not in kits:
        kit_two = [n for n, k in kits.items() if k.name == kit_two][0]

    try:
        url = 'v_compare1.php'
        page = _driver.find_element_by_xpath('//a[@href="' + url + '"]')
        page.click()

        kit1 = _driver.find_element_by_name('kit1')
        kit1.clear()
        kit1.send_keys(kit_one)

        kit2 = _driver.find_element_by_name('kit2')
        kit2.clear()
        kit2.send_keys(kit_two)

        submit = _driver.find_element_by_name('xsubmit')
        submit.click()

        segments: List[SegmentResult] = []
        root = lxml.html.fromstring(_driver.page_source)
        for table in root.xpath("//table"):
            rows = [row for row in table.xpath('.//tr')]

            # check that the first column in the first row has value "Chr"
            tds = rows[0].xpath('.//td')
            first_column_value = tds[0].text
            if first_column_value != 'Chr':
                continue

            for row in rows[1:]:
                columns = row.xpath('.//td')
                assert len(columns) == 5

                segment = SegmentResult(
                    chromosome=columns[0].text,
                    start=_comma_value_to_int(columns[1].text),
                    end=_comma_value_to_int(columns[2].text),
                    centimorgans=float(columns[3].text),
                    num_snps=_comma_value_to_int(columns[4].text))
                segments.append(segment)

        largest_segment: float = 0.0
        total_half_match_segments: float = 0.0
        pct_half_match_segments: float = 0.0
        most_recent_common_ancestor: float = -1.0
        shared_segments: int = 0
        num_snps: int = 0
        pct_snps_identical: float = 0.0
        version: str = 'none found'

        for line in _driver.page_source.split('\n'):
            line = line.rstrip('\r\n').strip()
            line = line.replace('<br>', '')

            if 'No shared DNA segments found' in line:
                assert len(segments) == 0
            elif 'Largest segment' in line:
                largest_segment = float(line.split(' = ')[1].split(' ')[0])
            elif 'Total Half-Match segments' in line:
                fields = line.split(' = ')[1].split(' ')
                total_half_match_segments = float(fields[0])
                pct_half_match_segments = float(fields[2].replace('(',
                                                                  '').replace(
                                                                      ')', ''))
            elif 'Estimated number of generations to MRCA' in line:
                most_recent_common_ancestor = float(
                    line.split(' = ')[1].split(' ')[0])
            elif 'shared segments found for this comparison' in line:
                shared_segments = int(line.split(' ')[0])
            elif 'SNPs used for this comparison' in line:
                num_snps = int(line.split(' ')[0])
            elif 'Pct SNPs are full identical' in line:
                pct_snps_identical = float(line.split(' ')[0])
            elif 'Ver:' in line:
                version = line.replace('<font size="2">Ver: ', '') \
                    .replace('</font>', '') \
                    .replace(' ', '-')

        final_result = None
        vars = [
            largest_segment, total_half_match_segments,
            pct_half_match_segments, most_recent_common_ancestor,
            shared_segments, num_snps, pct_snps_identical, version
        ]
        if all([v is not None for v in vars]):
            final_result = OneToOneAutosomeResult(
                kit_one=kits[kit_one],
                kit_two=kits[kit_two],
                segments=segments,
                largest_segment=largest_segment,
                total_half_match_segments=total_half_match_segments,
                pct_half_match_segments=pct_half_match_segments,
                most_recent_common_ancestor=most_recent_common_ancestor,
                shared_segments=shared_segments,
                num_snps=num_snps,
                pct_snps_identical=pct_snps_identical,
                version=version)

    except Exception as e:
        _driver.close()
        raise e

    if driver is None:
        _driver.close()

    if final_result is not None:
        summary: Path = Path(str(output_prefix) + '.summary.txt')
        write_metric(summary, final_result)
        detailed: Path = Path(str(output_prefix) + '.detailed.txt')
        write_metrics(detailed, final_result.segments)

    return final_result