Exemple #1
0
def main():
    path = "../../resources/mmtf_reduced_sample/"

    conf = SparkConf().setMaster("local[*]") \
                         .setAppName("FilterByDExperimentalMethods")
    sc = SparkContext(conf=conf)

    MmtfReader.readSequenceFile(path, sc) \
                 .filter(experimentalMethods(experimentalMethods.NEUTRON_DIFFRACTION, experimentalMethods.X_RAY_DIFFRACTION)) \
                 .keys() \
                 .foreach(lambda x: print(x))

    sc.stop()
def main():
	path = "../../resources/mmtf_reduced_sample/"

	conf = SparkConf().setMaster("local[*]") \
                      .setAppName("FilterByPolymerChainType")
	sc = SparkContext(conf=conf)

	count = MmtfReader.readSequenceFile(path, sc) \
                      .filter(containsPolymerChainType(containsPolymerChainType.DNA_LINKING, containsPolymerChainType.RNA_LINKING)) \
                      .filter(notFilter(containsLProteinChain())) \
                      .filter(notFilter(containsDSaccharideChain()))
                      .count()
Exemple #3
0
def main():
    path = "../../resources/mmtf_reduced_sample/"

    conf = SparkConf().setMaster("local[*]") \
                      .setAppName("filterByResolution")
    sc = SparkContext(conf=conf)

    count = MmtfReader.readSequenceFile(path, sc) \
                      .filter(rFree(0.0,2.0)) \
                      .count()

    print("Number of structures : " + str(count))

    sc.stop()
Exemple #4
0
def main():
	path = "../../resources/mmtf_reduced_sample/""

	conf = SparkConf().setMaster("local[*]") \
                      .setAppName("FilterByGroup")
	sc = SparkContext(conf = conf)

	count = MmtfReader.readSequenceFile(path, sc) \
                      .filter(containsGroup("ATP", "MG")) \
                      .count()

	print("Number of structure with ATP + MG : " +
		str(count))

	sc.stop()
def main():
	path = "../../resources/mmtf_reduced_sample/""

	conf = SparkConf().setMaster("local[*]") \
                      .setAppName("FilterByreleaseDate")
	sc = SparkContext(conf = conf)

	count = MmtfReader.readSequenceFile(path, sc) \
                      .filter(releaseDate("2000-01-28","2017-02-28")) \
                      .count()

	print("Number of structure released between 2000-01-28 and 2017-02-28 is : " +
		str(count))

	sc.stop()
def main():
    path = "../../resources/mmtf_full_sample/"

    conf = SparkConf().setMaster("local[*]") \
                      .setAppName("keywordSearch")
    sc = SparkContext(conf=conf)

    pdb = MmtfReader.readSequenceFile(path, sc)
    sql = "select pdbid, resolution, biol_species, db_uniprot, db_pfam, hit_score from keyword_search('porin') order by hit_score desc"

    search = PdbjMine(sql)
    count = pdb.filter(search).keys().count()
    print(f"Number of entries using sql to filter: {count}")

    dataset = PdbjMineService.getDataset(sql)
    dataset.show(10)
    search = PdbjMine(dataset=dataset)
    count = pdb.filter(search).keys().count()
    print(f"Number of entries using dataset to filter: {count}")

    sc.stop()
def main():
    start = time.time()

    conf = SparkConf().setMaster("local[*]") \
                      .setAppName("secondaryStructureElementDemo")
    sc = SparkContext(conf = conf)

    pdb = MmtfReader.downloadMmtfFiles(["1STP"],sc).cache()

    pdb = pdb.flatMap(structureToPolymerChains()) \
             .filter(containsLProteinChain())

    ds = secondaryStructureElementExtractor.getDataset(pdb,"E", 6)

    ds.show(50, False)

    end = time.time()

    print("Time: %f  sec." %(end-start))

    sc.stop()