def _ls(driver: Optional[WebDriver] = None, status: Optional[List[KitStatus]] = None) -> List[Kit]: '''Returns the the kits available on GEDMatch.com Args: status: return only kits with the given status(es) ''' kits: List[Kit] = [] _driver = main_page() if driver is None else driver try: root = lxml.html.fromstring(_driver.page_source) for row in root.xpath(KITS_XPATH + '//tr'): kit: Kit = kit_from_lxml_row(row) if status is None or kit.status in status: kits.append(kit) except Exception as e: _driver.close() raise e if driver is None: _driver.close() return kits
def rm(*number: str) -> None: '''Removes the kit(s) with the given number(s).''' credentials = Credentials.build() driver = main_page() last_time = datetime.now() num_kits = len(number) for i, n in enumerate(number, 1): logging.info(f'Deleting kit: {n} {i}/{num_kits}') # Re-login if it takes too long if (datetime.now() - last_time).total_seconds() > 30: driver.close() driver = main_page() last_time = datetime.now() _rm_impl(number=n, credentials=credentials, driver=driver) driver.close()
def one_to_many_tuples( *, kits: List[str], output_prefix: Path, max_matches: Optional[int] = None ) -> List[Optional[List[OneToManyAutosomeResult]]]: '''Performs one-to-many autosomal analysis. Args: kits: one or more kit names or numbers. output_prefix: the path prefix for the output files. max_matches: the maximum # of matches to return ''' driver = main_page() logging.info(f'retrieving list of kits.') kits_dict: Dict[str, Kit] = dict([(kit.number, kit) for kit in ls(driver)]) results: List[Optional[List[OneToManyAutosomeResult]]] = [] logging.info(f'processing {len(kits)} kit pairs.') for i, kit in enumerate(kits, 1): output = Path(str(output_prefix) + f'{kit}.txt') logging.info(f'processing ({i}/{len(kits)}): {kit}: {output}') result = one_to_many_api(kit=kit, output=output, max_matches=max_matches, kits=kits_dict, driver=driver) if result is None: logging.warning(f'No 1:1 autosomal match found for kit {kit}.') results.append(result) # go to home page xpath = '/html/body/table[1]/tbody/tr[2]/td[2]/table/tbody/tr/td[1]/a' driver.find_element_by_xpath(xpath).click() WebDriverWait(driver, 90).until( expected_conditions.presence_of_element_located( (By.XPATH, HOME_PAGE_XPATH))) driver.close() return results
def _rm(number: str) -> None: '''Removes the kit with the given number.''' credentials = Credentials.build() driver = main_page() try: kits_xpath = '/html/body/center/table/tbody/tr[2]/td/center/table[1]/tbody/tr/td[1]/' + \ 'table/tbody/tr[4]/td/table/tbody/tr[3]/td/table' kits_table = driver.find_element_by_xpath(kits_xpath) for row in kits_table.find_elements_by_tag_name('tr'): columns = row.find_elements_by_tag_name('td') if columns[0].text != number: continue elem = columns[3].find_element_by_css_selector( "form[action='KitProfile.php']") elem.click() break tab = driver.find_element_by_css_selector("a[href='#2a']") tab.click() password = driver.find_element_by_css_selector( "input[type='PASSWORD'][name='VerifyPassword']") password.send_keys(credentials.password) submit = driver.find_element_by_css_selector( "input[type='SUBMIT'][value='Delete']") submit.click() # are you sure? alert_one = driver.switch_to.alert alert_one.accept() driver.switch_to.parent_frame() logging.info(f'Deleted kit: {number}') except Exception as e: print(driver.page_source) driver.close() raise e driver.close()
def one_to_one_tuples( *, kits: List[str], output_prefix: Path) -> List[Optional[OneToOneAutosomeResult]]: '''Performs one-to-one autosomal analysis. Args: kits: one or more kit name or number tuples (comma seperated). output_prefix: the path prefix for the output files. ''' driver = main_page() logging.info(f'retrieving list of kits.') kits_dict: Dict[str, Kit] = dict([(kit.number, kit) for kit in ls(driver)]) results: List[Optional[OneToOneAutosomeResult]] = [] logging.info(f'processing {len(kits)} kit pairs.') for i, kit_tuple in enumerate(kits, 1): kit_one, kit_two = kit_tuple.split(',') logging.info( f'processing ({i}/{len(kits)}) kits {kit_one} and {kit_two}.') tuple_output_prefix = Path( str(output_prefix) + f'.{kit_one}-{kit_two}') result = one_to_one_api(kit_one=kit_one, kit_two=kit_two, output_prefix=tuple_output_prefix, kits=kits_dict, driver=driver) if result is None: logging.warning( f'No 1:1 autosomal match found for kits {kit_one} and {kit_two}.' ) results.append(result) logging.info(f'writing output.') summary: Path = Path(str(output_prefix) + '.summary.txt') write_metrics(summary, results) driver.close() return results
def _ls() -> List[Kit]: '''Returns the the kits available on GEDMatch.com''' kits: List[Kit] = [] driver = main_page() try: kits_xpath = '/html/body/center/table/tbody/tr[2]/td/center/table[1]/tbody/tr/td[1]' + \ '/table/tbody/tr[4]/td/table/tbody/tr[3]/td/table' kits_table = driver.find_element_by_xpath(kits_xpath) for row in kits_table.find_elements_by_tag_name('tr'): columns = row.find_elements_by_tag_name('td') kit = Kit(name=columns[1].text, number=columns[0].text) kits.append(kit) except Exception as e: driver.close() raise e driver.close() return kits
def _add(genotypes: Path, name: str, raw_data_type: Optional[RawDataType] = None, fam: Optional[Path] = None) -> Kit: '''Performs a generic upload of the given genotype. The sample information when given will be used to determine the sex of the donor, otherwise it will default to female. Args: genotypes: the path to the genotype file. name: the name of the donor. raw_data_type: optionally the raw data type to select. fam: optionally a PLINK sample information file; see the following link https://www.cog-genomics.org/plink2/formats#fam Returns: the kit created by GEDMatch Raises: Exception: if the kit could not be uploaded ''' kit_number: str = '' fam_dict = _read_fam(fam) if fam is not None else {} driver = main_page() try: url = 'v_upload1.phpnf' page = driver.find_element_by_xpath('//a[@href="' + url + '"]') page.click() in_name = driver.find_element_by_name('name') in_name.clear() in_name.send_keys(name) male = False if name not in fam_dict or not fam_dict[name].sex else True if male: in_male = driver.find_element_by_css_selector( "input[type='radio'][value='M'][name='sex']") in_male.click() else: in_female = driver.find_element_by_css_selector( "input[type='radio'][value='F'][name='sex']") in_female.click() in_source = Select(driver.find_element_by_name('source')) in_source.select_by_index(21) # other raw_data_type_value = str( 6 if raw_data_type is None else raw_data_type.value) in_auth = driver.find_element_by_css_selector( "input[type='radio'][value='" + raw_data_type_value + "'][name='auth']") in_auth.click() in_public = driver.find_element_by_css_selector( "input[type='radio'][value='Y'][name='public2']") in_public.click() in_file = driver.find_element_by_css_selector( "input[type='file'][name='GedcomFile']") in_file.send_keys(str(genotypes.resolve())) submit = driver.find_element_by_css_selector( "input[type='submit'][name='gedsubmit']") submit.click() wait_for_link_text = 'Click here to get to Home' WebDriverWait(driver, 90).until( expected_conditions.visibility_of_element_located( (By.LINK_TEXT, wait_for_link_text))) for line in driver.page_source.split('\n'): if 'Assigned kit number:' in line: line = line.rstrip('\r\n').strip().replace('</font>', '') kit_number = line.split('>')[-1] break else: raise ValueError('No kit number returned by GEDmatch.') except Exception as e: print(driver.page_source) driver.close() raise e driver.close() return Kit(name=name, number=kit_number)
def _one_to_many( kit: str, output: Path, max_matches: Optional[int], kits: Optional[Dict[str, Kit]] = None, driver: Optional[WebDriver] = None) -> List['OneToManyAutosomeResult']: '''Performs one-to-many autosomal analysis. Args: kit: the name or number output: the output file. max_matches: the maximum # of matches to return kits: a mapping of kit name to kit, useful when performing many 1:1 analyses. Returns: A list of matches. ''' results: List['OneToManyAutosomeResult'] = [] _driver = main_page() if driver is None else driver if kits is None: kits = dict([(kit.number, kit) for kit in _ls(_driver)]) if kit not in kits: kit = [n for n, k in kits.items() if k.name == kit][0] try: url = f'OneToMany0Tier2.php?kit_num={kit}' page = _driver.find_element_by_xpath('//a[@href="' + url + '"]') page.click() root = lxml.html.fromstring(_driver.page_source) for table in root.xpath("//table"): rows = [row for row in table.xpath('.//tr')] # check that the first column in the first row has value "Chr" tds = rows[0].xpath('.//td') first_column_value = tds[0].text if first_column_value != 'Kit': continue header = [td.text for td in tds] assert len(header) == 10, f'header: {header}' last_row: int = len( rows) if max_matches is None else max_matches + 1 logging.info(f'Reading rows for {kit}') for row_num, row in enumerate(rows[1:last_row], 1): columns = [maybe_href(td) for td in row.xpath('.//td')] assert len(columns) == 10, f'columns: {columns}' d = dict(zip(header, columns)) result = OneToManyAutosomeResult( kit_one=kits[kit], kit_two=Kit(name=d['Name'], number=d['Kit'], email=d['Email'], testing_company=d['Testing Company']), largest_segment=float(d['Largest Seg']), total_half_match_segments=float(d['Total cM']), most_recent_common_ancestor=float(d['Gen']), num_snps=int(d['Overlap']), date_compared=d['Date Compared']) results.append(result) logging.info(f'Returning {len(results)} results.') except Exception as e: _driver.close() raise e if driver is None: _driver.close() write_metrics(output, results) return results
def _one_to_one( kit_one: str, kit_two: str, output_prefix: Path, kits: Optional[Dict[str, Kit]] = None, driver: Optional[WebDriver] = None ) -> Optional['OneToOneAutosomeResult']: '''Performs one-to-one autosomal analysis. Args: kit_one: the first kit name or number kit_two: the second kit name or number output_prefix: the prefix for the output files. kits: a mapping of kit name to kit, useful when performing many 1:1 analyses. Returns: None if the analysis did not find any segments, otherwise the analysis results. ''' _driver = main_page() if driver is None else driver if kits is None: kits = dict([(kit.number, kit) for kit in _ls(_driver)]) if kit_one not in kits: kit_one = [n for n, k in kits.items() if k.name == kit_one][0] if kit_two not in kits: kit_two = [n for n, k in kits.items() if k.name == kit_two][0] try: url = 'v_compare1.php' page = _driver.find_element_by_xpath('//a[@href="' + url + '"]') page.click() kit1 = _driver.find_element_by_name('kit1') kit1.clear() kit1.send_keys(kit_one) kit2 = _driver.find_element_by_name('kit2') kit2.clear() kit2.send_keys(kit_two) submit = _driver.find_element_by_name('xsubmit') submit.click() segments: List[SegmentResult] = [] root = lxml.html.fromstring(_driver.page_source) for table in root.xpath("//table"): rows = [row for row in table.xpath('.//tr')] # check that the first column in the first row has value "Chr" tds = rows[0].xpath('.//td') first_column_value = tds[0].text if first_column_value != 'Chr': continue for row in rows[1:]: columns = row.xpath('.//td') assert len(columns) == 5 segment = SegmentResult( chromosome=columns[0].text, start=_comma_value_to_int(columns[1].text), end=_comma_value_to_int(columns[2].text), centimorgans=float(columns[3].text), num_snps=_comma_value_to_int(columns[4].text)) segments.append(segment) largest_segment: float = 0.0 total_half_match_segments: float = 0.0 pct_half_match_segments: float = 0.0 most_recent_common_ancestor: float = -1.0 shared_segments: int = 0 num_snps: int = 0 pct_snps_identical: float = 0.0 version: str = 'none found' for line in _driver.page_source.split('\n'): line = line.rstrip('\r\n').strip() line = line.replace('<br>', '') if 'No shared DNA segments found' in line: assert len(segments) == 0 elif 'Largest segment' in line: largest_segment = float(line.split(' = ')[1].split(' ')[0]) elif 'Total Half-Match segments' in line: fields = line.split(' = ')[1].split(' ') total_half_match_segments = float(fields[0]) pct_half_match_segments = float(fields[2].replace('(', '').replace( ')', '')) elif 'Estimated number of generations to MRCA' in line: most_recent_common_ancestor = float( line.split(' = ')[1].split(' ')[0]) elif 'shared segments found for this comparison' in line: shared_segments = int(line.split(' ')[0]) elif 'SNPs used for this comparison' in line: num_snps = int(line.split(' ')[0]) elif 'Pct SNPs are full identical' in line: pct_snps_identical = float(line.split(' ')[0]) elif 'Ver:' in line: version = line.replace('<font size="2">Ver: ', '') \ .replace('</font>', '') \ .replace(' ', '-') final_result = None vars = [ largest_segment, total_half_match_segments, pct_half_match_segments, most_recent_common_ancestor, shared_segments, num_snps, pct_snps_identical, version ] if all([v is not None for v in vars]): final_result = OneToOneAutosomeResult( kit_one=kits[kit_one], kit_two=kits[kit_two], segments=segments, largest_segment=largest_segment, total_half_match_segments=total_half_match_segments, pct_half_match_segments=pct_half_match_segments, most_recent_common_ancestor=most_recent_common_ancestor, shared_segments=shared_segments, num_snps=num_snps, pct_snps_identical=pct_snps_identical, version=version) except Exception as e: _driver.close() raise e if driver is None: _driver.close() if final_result is not None: summary: Path = Path(str(output_prefix) + '.summary.txt') write_metric(summary, final_result) detailed: Path = Path(str(output_prefix) + '.detailed.txt') write_metrics(detailed, final_result.segments) return final_result