def main(): path = "../../resources/mmtf_reduced_sample/" conf = SparkConf().setMaster("local[*]") \ .setAppName("FilterByDExperimentalMethods") sc = SparkContext(conf=conf) MmtfReader.readSequenceFile(path, sc) \ .filter(experimentalMethods(experimentalMethods.NEUTRON_DIFFRACTION, experimentalMethods.X_RAY_DIFFRACTION)) \ .keys() \ .foreach(lambda x: print(x)) sc.stop()
def main(): path = "../../resources/mmtf_reduced_sample/" conf = SparkConf().setMaster("local[*]") \ .setAppName("FilterByPolymerChainType") sc = SparkContext(conf=conf) count = MmtfReader.readSequenceFile(path, sc) \ .filter(containsPolymerChainType(containsPolymerChainType.DNA_LINKING, containsPolymerChainType.RNA_LINKING)) \ .filter(notFilter(containsLProteinChain())) \ .filter(notFilter(containsDSaccharideChain())) .count()
def main(): path = "../../resources/mmtf_reduced_sample/" conf = SparkConf().setMaster("local[*]") \ .setAppName("filterByResolution") sc = SparkContext(conf=conf) count = MmtfReader.readSequenceFile(path, sc) \ .filter(rFree(0.0,2.0)) \ .count() print("Number of structures : " + str(count)) sc.stop()
def main(): path = "../../resources/mmtf_reduced_sample/"" conf = SparkConf().setMaster("local[*]") \ .setAppName("FilterByGroup") sc = SparkContext(conf = conf) count = MmtfReader.readSequenceFile(path, sc) \ .filter(containsGroup("ATP", "MG")) \ .count() print("Number of structure with ATP + MG : " + str(count)) sc.stop()
def main(): path = "../../resources/mmtf_reduced_sample/"" conf = SparkConf().setMaster("local[*]") \ .setAppName("FilterByreleaseDate") sc = SparkContext(conf = conf) count = MmtfReader.readSequenceFile(path, sc) \ .filter(releaseDate("2000-01-28","2017-02-28")) \ .count() print("Number of structure released between 2000-01-28 and 2017-02-28 is : " + str(count)) sc.stop()
def main(): path = "../../resources/mmtf_full_sample/" conf = SparkConf().setMaster("local[*]") \ .setAppName("keywordSearch") sc = SparkContext(conf=conf) pdb = MmtfReader.readSequenceFile(path, sc) sql = "select pdbid, resolution, biol_species, db_uniprot, db_pfam, hit_score from keyword_search('porin') order by hit_score desc" search = PdbjMine(sql) count = pdb.filter(search).keys().count() print(f"Number of entries using sql to filter: {count}") dataset = PdbjMineService.getDataset(sql) dataset.show(10) search = PdbjMine(dataset=dataset) count = pdb.filter(search).keys().count() print(f"Number of entries using dataset to filter: {count}") sc.stop()
def main(): start = time.time() conf = SparkConf().setMaster("local[*]") \ .setAppName("secondaryStructureElementDemo") sc = SparkContext(conf = conf) pdb = MmtfReader.downloadMmtfFiles(["1STP"],sc).cache() pdb = pdb.flatMap(structureToPolymerChains()) \ .filter(containsLProteinChain()) ds = secondaryStructureElementExtractor.getDataset(pdb,"E", 6) ds.show(50, False) end = time.time() print("Time: %f sec." %(end-start)) sc.stop()