class TestHTTPSExecute(unittest.TestCase): """ Class to test https execute method. """ def setUp(self): """ Define some rql and create a connection. """ # Set logging level logging.basicConfig(level=logging.DEBUG) # Ask for url & login information https_url = raw_input( "\nEnter the http url [default: https://imagen2.cea.fr/database/]: ") if not https_url: https_url = "https://imagen2.cea.fr/database/" login = raw_input("\nEnter the login: "******"Enter the password: "******"Any C, G Where X is Subject, X code_in_study C, " "X handedness 'ambidextrous', X gender G") # HTTP test self.connection = CWInstanceConnection(https_url, login, password, realm="Imagen") def test_execute(self): """ Method to test if we can interogate the server from the script. """ rset = self.connection.execute(self.rql, export_type="json") self.assertTrue(len(rset) > 0)
class TestHTTPExecuteTwisted(unittest.TestCase): """ Class to test http execute method with twisted server. """ def setUp(self): """ Define some rql and create a connection. """ # Set logging level logging.basicConfig(level=logging.DEBUG) # Ask for url & login information http_url = raw_input( "\nEnter the http url [default: http://localhost:8080/]: ") if not http_url: http_url = "http://localhost:8080/" login = raw_input("\nEnter the login [default: admin]: ") if not login: login = "******" password = getpass.getpass("Enter the password [default: a]: ") if not password: password = "******" # Create dummy rqls self.rql1 = ("Any C, G Where X is Subject, X code_in_study C, " "X handedness 'ambidextrous', X gender G") self.rql2 = ("Any S WHERE S is Subject") self.rql3 = ("Any S WHERE S is Scan, S has_data A, A field '3T', " "S in_assessment B, B timepoint 'V0', B concerns D, " "D code_in_study 'subject1'") # HTTP test self.connection = CWInstanceConnection(http_url, login, password, port=9191) def test_execute(self): """ Method to test if we can interogate the server from the script. """ rset = self.connection.execute(self.rql1, export_type="json") self.assertTrue(len(rset) > 0) def test_execute_with_sync(self): """ Method to test if we can create/download a search from the script. """ # Check twisted server rset = self.connection.execute_with_sync(self.rql3, "/tmp/sync_twisted", timer=1) for item in rset: self.assertTrue(os.path.isfile(item[0])) rset = self.connection.execute_with_sync(self.rql2, "/tmp/sync_twisted", timer=1)
def get_snps_of_gene(gene_name, metagen_connection=None, metagen_url=DEFAULT_METAGEN_URL, timeout=10, nb_tries=3): """ Get snp ids and associated metadata (chromosome and positions) associated to a gene by requesting the Metagen server. The user can provide a Metagen connection, otherwise it is created. Parameters ---------- gene_name: str Gene HGNC name. metagen_connection: CWInstanceConnection, default None A connection to the Metagen instance. Created if not passed. metagen_url: str, default module url Url of the Metagen server. Ignored if a connection to the Metagen server is passed. timeout: int, default 10 Max time in seconds to wait for a response from Metagen. nb_tries: int, default 3 If the server failed to answer, retry nb_tries-1 times. Return ------ snp_ids, chromosomes, bp_positions: list """ # If not passed, create a connection to the Metagen server if metagen_connection is None: metagen_connection = CWInstanceConnection(metagen_url, "anon", "anon") rql = ("Any SID, CN, POS Where G is Gene, G hgnc_id '%s', " "G gene_snps S, S rs_id SID, S snp_chromosome C, " "C name CN, S start_position POS") % gene_name rset = metagen_connection.execute(rql, timeout=timeout, nb_tries=nb_tries) # Return snps as namedtuples to simplify usage Snp = namedtuple("Snp", ["rs_id", "chromosome", "bp_pos"]) snps = [Snp(rs_id, chrom, bp_pos) for rs_id, chrom, bp_pos in rset] return snps
def get_genes(metagen_connection=None, metagen_url=DEFAULT_METAGEN_URL, timeout=10, nb_tries=3): """ Get all the gene names by requesting the Metagen server. The user can provide a Metagen connection, otherwise it is created. Parameters ---------- metagen_connection: CWInstanceConnection, default None A connection to the Metagen instance. Created if not passed. metagen_url: str, default module url Url of the Metagen server. Ignored if a connection to the Metagen server is passed. timeout: int, default 10 Max time in seconds to wait for a response from Metagen. nb_tries: int, default 3 If the server failed to answer, retry nb_tries-1 times. Return ------ hgnc_id, chromosome: list """ # If not passed, create a connection to the Metagen server if metagen_connection is None: metagen_connection = CWInstanceConnection(metagen_url, "anon", "anon") rql = ("Any GN, CN Where G is Gene, G hgnc_id GN, G gene_chromosome C, " "C name CN") rset = metagen_connection.execute(rql, timeout=timeout, nb_tries=nb_tries) # Return genes as namedtuples to simplify usage Gene = namedtuple("Gene", ["hgnc_id", "chromosome"]) genes = [Gene(name, chrom) for name, chrom in rset] return genes
def metagen_get_meta_of_snps(snp_ids, metagen_connection=None, metagen_url=DEFAULT_METAGEN_URL, timeout=10, nb_tries=3): """ Get snp metadata from rs ids: chromosome, basepair position, related genes by requesting the Metagen server. The user can provide a Metagen connection, otherwise it is created. Parameters ---------- snp_ids: list of str List of snp rs ids. metagen_connection: CWInstanceConnection, default None A connection to the Metagen instance. Created if not passed. metagen_url: str, default module url Url of the Metagen server. Ignored if a connection to the Metagen server is passed. timeout: int, default 10 Max time in seconds to wait for a response from Metagen. nb_tries: int, default 3 If the server failed to answer, retry nb_tries-1 times. Return ------ meta_of_snp; dict Map <rs id> -> namedtuple(<chromosome>, <bp_pos>, <genes>). """ # If not passed, create a connection to the Metagen server if metagen_connection is None: metagen_connection = CWInstanceConnection(metagen_url, "anon", "anon") # Remove redundancy snp_ids = list(set(snp_ids)) SnpMetadata = namedtuple("Snp", ["chromosome", "bp_pos", "genes"]) # Dict mapping <rs id> -> namedtuple(<chromosome>, <bp_pos>, <genes>) meta_of_snp = dict() # Request metadata from Metagen: requesting N variants at a time # (one by one would take too long when there are many variants). # The requesting is done is 2 steps: request chrom, pos and then # request associated genes, because not all snps are associated to genes. N = 5000 common_kwargs = dict(timeout=timeout, nb_tries=nb_tries) for i in xrange(0, len(snp_ids), N): subset_snp_ids = snp_ids[i: i+N] # Note that we use a complicated "' ,'".join() in the rql creation # instead of str(tuple()). When there is only one snp id, str(tuple()) # introduces a trailing comma which is not allowed in RQL rql_1 = ("Any SID, CN, POS WHERE S is Snp, S rs_id IN ('%s'), " "S rs_id SID, S chromosomes C, C name CN, " "S start_position POS") % "' ,'".join(subset_snp_ids) rset_1 = metagen_connection.execute(rql_1, **common_kwargs) for snp_id, chrom, bp_pos in rset_1: meta_of_snp[snp_id] = SnpMetadata(chrom, bp_pos, set()) # Look for gene-snp assocation rql_2 = ("Any SID, GN WHERE S is Snp, S rs_id IN ('%s'), S rs_id SID, " "S genes G, G name GN") % "' ,'".join(subset_snp_ids) rset_2 = metagen_connection.execute(rql_2, **common_kwargs) for snp_id, gname in rset_2: meta_of_snp[snp_id].genes.add(gname) return meta_of_snp
if (login is None) or (password is None): print 'login: '******'password: '******'DAWBA-dawba-youth', " "QR in_assessment A, A timepoint 'BL', QR file F, " "F data D, QR subject S, S code_in_study ID, " "S handedness HAND, S gender SEX") # execute rset = connect.execute(rql, export_type="json") # parse the result with specifi attention for the last field loaded_rset = [(sid, handednesss, sex, int(sdata[u'sstartdate'])) for sid, handednesss, sex, sdata in rset if u'sstartdate' in sdata.keys()] demog = pd.DataFrame(loaded_rset, columns=['IID', 'Handedness', 'Gender', 'Age']) print demog.head() # save the dataframe in a csv outf = '/tmp/demog.csv' print('Demo output file : see file : in {0}'.format(outf)) with open(outf, 'w') as fp: demog.to_csv(fp, sep='\t', index=False)