Example #1
0
 def _fetch_pcassay(self, args):
     """PubChem BioAssay cannot be obtained via efetch, use web URL to get one at a time instead"""
     if 'id' in args:
         s_id=args['id']
     else:
         # retrieve id list from WebEnv+query_key
         s_id=self.esearch(args)
     if type(s_id) is str:
         s_id=s_id.split(",")
     out=[]
     for id in s_id:
         url="https://pubchem.ncbi.nlm.nih.gov/assay/assay.cgi?aid=%s&version=1.2&q=expdesc_xmldisplay" % str(id)
         trial=1
         while True:
             self._check_interval()
             r = requests.get(url)
             self._flag_check()
             if not r.ok:
                 if trail==self._giveup:
                     util.error_msg('{r.reason} ({r.status_code}): {r.error}'.format(
                         r=r, error=r.text))
                 else:
                     time.sleep(10)
             else:
                 x=re.sub(r'<PC-AssayDescription\s.*?>', '<dummy_tag><PC-AssayDescription>', re.sub(r'\n', '', r.content))
                 out.append(x+'</dummy_tag>')
                 break
             trial+=1
     return out
Example #2
0
def get_con(s_name, auth_path=None, db=None):
    """For MySQL, one can specify a default database to connect to"""
    con = None
    auth_path = os.path.dirname(os.path.abspath(
        __file__)) + '/db.csv' if auth_path is None else auth_path
    csv_path = util.format_path(auth_path)
    t_db = pd.read_csv(csv_path)
    t_db.fillna('', inplace=True)
    t_db = t_db[t_db['ID'] == s_name]
    if len(t_db) == 0:
        util.error_msg('Database %s is not defined!' % s_name)
    one = t_db.iloc[0]
    s_db = db or one['DB']
    if one['TYPE'] == 'MYSQL':
        import MySQLdb as mysql
        #print one['HOST'], one['USR'], one['PWD'], s_db
        con = mysql.connect(one['HOST'], one['USR'], one['PWD'], s_db)
    elif one['TYPE'] == 'POSTGRES':
        import pgdb
        # make sure you do:
        #module load postgresql/9.2.4
        #export LD_LIBRARY_PATH=.:/tools/GNU/postgresql/9.2.4/lib/
        con = pgdb.connect(one['CONNECT'])
    elif one['TYPE'] == 'ORACLE':
        import cx_Oracle as oracle
        con = oracle.connect(one['CONNECT'])
    else:
        util.error_msg('Unsupported database engine: %s' % one['TYPE'])
    return con
Example #3
0
 def _fetch_pccompound(self, args):
     """PubChem Compound cannot be obtained via efetch, use web URL to get one at a time instead"""
     if 'id' in args:
         s_id=args['id']
     else:
         # retrieve id list from WebEnv+query_key
         s_id=self.esearch(args)
     if type(s_id) is str:
         s_id=s_id.split(",")
     out=[]
     for id in s_id:
         url="https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/%s/XML/?response_type=display" % str(id)
         trial=1
         while True:
             self._check_interval()
             r = requests.get(url)
             self._flag_check()
             if not r.ok:
                 if trail==self._giveup:
                     util.error_msg('{r.reason} ({r.status_code}): {r.error}'.format(
                         r=r, error=r.text))
                 else:
                     time.sleep(10)
             else:
                 x=re.sub(r'<Record\s.*?>', '<dummy_tag><Record>', re.sub(r'\n', '', r.content))
                 out.append(x+'</dummy_tag>')
                 break
             trial+=1
     return out
Example #4
0
def sql_in_old1(s_sql_left,
                s_sql_right,
                S_id,
                num=1000,
                con=None,
                params_before=None,
                params_after=None):
    if con is None:
        util.error_msg('Database connection is not provided!')
    S_id = list(S_id)
    n = len(S_id)
    t = []
    # somethings SQL contains its own parameters, we need to provide parameter before/after the
    # S_id parameters
    params_before = params_before or []
    params_after = params_after or []
    for i in range(0, n, num):
        j = min(n, i + num)
        s_id = ",".join(["?"] * (j - i))
        t2 = from_sql(con,
                      s_sql_left + s_id + s_sql_right,
                      params=params_before + S_id[i:j] + params_after)
        #if t2 is not None and len(t2)>0:
        t.append(t2)
    t = pd.concat(t, axis=0, ignore_index=True)
    return t
Example #5
0
def make_icon(s_png, width=100, height=100, s_out=None):
    import scipy.misc
    if width is None and height is None:
        util.error_msg('At least one must not be None: width and height')
    #M=mahotas.imread(s_png)
    M = scipy.misc.imread(s_png)
    h, w, z = M.shape
    if width is None:
        scale = height * 1.0 / h
    elif height is None:
        scale = width * 1.0 / w
    else:
        scale_h = height * 1.0 / h
        scale_w = width * 1.0 / w
        scale = min(scale_h, scale_w)
    h = int(h * scale + 0.5)
    w = int(w * scale + 0.5)
    Mr = scipy.misc.imresize(M, (h, w))
    h2, w2, z2 = Mr.shape
    M2 = np.ones([height, width, 3], dtype='1u1') * 255
    offset0 = (width - w2) / 2
    offset1 = (height - h2) / 2
    M2[offset1:offset1 + h2, offset0:offset0 + w2, 0] = Mr[:, :, 0]
    M2[offset1:offset1 + h2, offset0:offset0 + w2, 1] = Mr[:, :, 1]
    M2[offset1:offset1 + h2, offset0:offset0 + w2, 2] = Mr[:, :, 2]
    if s_out is None:
        s_out, s_ext = os.path.splitext(s_png)
        s_out += "_thumbnail.png"
    #mahotas.imsave(s_out, M2)
    scipy.misc.imsave(s_out, M2)
Example #6
0
 def __init__(self, BIN=None, TEMPLATE=None):
     self.BIN=BIN if BIN is not None else Circos.DEFAULT_BIN
     Circos.DEFAULT_TEMPLATE=os.path.join(os.path.dirname(__file__), "circos", "circos.conf.template")
     self.TEMPLATE=TEMPLATE if TEMPLATE is not None else Circos.DEFAULT_TEMPLATE
     if not os.path.exists(self.BIN):
         util.error_msg("Circos tool " + self.BIN + " does not exist!")
     if not os.path.exists(self.TEMPLATE):
         util.error_msg("Circos template " + self.TEMPLATE + " does not exist!")
Example #7
0
 def _add_attr(self, objs, property_name, S_value):
     """Used by to_list()"""
     if len(objs) != len(S_value):
         util.error_msg(
             'Attribute: %s, length of Objects and S_value do not match: %d vs %d!'
             % (property_name, len(objs), len(S_value)))
     for i, x in enumerate(objs):
         objs[i][property_name] = S_value[i]
Example #8
0
def search(query=None):
    """
    Search Google Play Music for a given query.

    Keyword arguments:
    query=None: The search query.

    Returns: A dict of lists with keys 'songs', 'artists', and 'albums'.
    """
    global content

    if query is None:  # No argument.
        error_msg(outbar, 'Missing search query.')
        return

    # Fetch as many results as we can display depending on terminal height.
    limit = int((main.getmaxyx()[0] - 3) / 3)
    addstr(outbar, 'Searching for \'%s\'...' % query)
    result = api.search(query, max_results=limit)

    outbar.erase()  # Remove trailing output.
    outbar.refresh()

    # 'class' => class of MusicObject
    # 'hits' => key in search result
    # 'key' => per-entry key in search result
    mapping = {
        'songs': {
            'class': Song,
            'hits': 'song_hits',
            'key': 'track',
        },
        'artists': {
            'class': Artist,
            'hits': 'artist_hits',
            'key': 'artist',
        },
        'albums': {
            'class': Album,
            'hits': 'album_hits',
            'key': 'album',
        }
    }
    content = {'songs': [], 'artists': [], 'albums': []}
    for k in content.keys():
        result[k] = iter(result[mapping[k]['hits']])

    # Create 'limit' of each type.
    for i in range(limit):
        for k in content.keys():
            try:
                content[k].append(mapping[k]['class'](next(
                    result[k])[mapping[k]['key']]))

            except StopIteration:
                pass

    return content
Example #9
0
 def wrapper(self, args):
     """Internal default wrapper function to be called if start(f=None)"""
     name = args[0]
     args = args[1:]
     if callable(name):
         return name(*args)
     else:
         if name not in self.registry:
             util.error_msg('Function %s were not registered!' % name)
         return self.registry[name](*args)
Example #10
0
def get_con_info(s_name, auth_path=None):
    auth_path = os.path.dirname(os.path.abspath(
        __file__)) + '/db.csv' if auth_path is None else auth_path
    csv_path = util.format_path(auth_path)
    t_db = pd.read_csv(csv_path)
    t_db.fillna('', inplace=True)
    t_db = t_db[t_db['ID'] == s_name]
    if len(t_db) == 0:
        util.error_msg('Database %s is not defined!' % s_name)
        return None
    return t_db.iloc[0]
Example #11
0
def add_column(s_file, R, s_name, l_separator=True):
    """Add an extra column using value R array to an existing heat map.
    s_file: str, file name without extension, it will modify .cdt and .atr
    R: array(int/float), values to add
    s_name: str, column name
    l_separator: bool, default True. If True, add a column of blank value to separate the new column from existing ones."""
    if re.search('\.\w{3}$', s_file):
        s_file = s_file[:-4]
    if not os.path.exists(s_file + '.cdt'):
        util.error_msg("File not exist: " + s_file + ".cdt!")
    f = open(s_file + '.cdt')
    S = []
    cnt = 0

    while True:
        line = f.readline()
        if not line: break
        SS = line.strip().split("\t")
        if SS[0].startswith('GENE'):
            if l_separator:
                SS.append('')
            SS.append('%.2f' % R[cnt])
            cnt += 1
        elif SS[0] == 'GID':
            if l_separator:
                SS.append('separator')
            SS.append(s_name)
        elif SS[0] == 'AID':
            X = [int(re.sub(r'\D', '', x)) for x in SS if x.startswith('ARRY')]
            n_array = max(X) + 1
            SS.append('ARRY%dX' % n_array)
            if l_separator:
                SS.append('ARRY%dX' % (n_array + 1))
        elif SS[0] == 'EWEIGHT':
            if l_separator:
                SS.append('0')
            SS.append('0')
        S.append(SS)
    f.close()
    S = ["\t".join(X) for X in S]
    util.save_list(s_file + '.cdt', S, s_end="\n")

    if os.path.exists(s_file + '.atr'):
        S = util.read_list(s_file + '.atr')
        SS = S[-1].split("\t")
        n_node = int(re.sub(r'\D', '', SS[0])) + 1
        S.append('NODE%dX\tNODE%dX\tARRY%dX\t0' %
                 (n_node, n_node - 1, n_array))
        if l_separator:
            S.append('NODE%dX\tNODE%dX\tARRY%dX\t0' %
                     (n_node + 1, n_node, n_array + 1))
        util.save_list(s_file + '.atr', S, s_end="\n")
 def tissue_specific(self):
     #http://bioinfo.wilmer.jhu.edu/tiger/download/ref2tissue-Table.txt
     url='http://bioinfo.wilmer.jhu.edu/tiger/download/ref2tissue-Table.txt'
     # RefSeq Tissue(s)
     # http://www.biomedcentral.com/1471-2105/9/271
     r = requests.post(url)
     if not r.ok:
         util.error_msg('Cannot fetch tissue specific data from JHU: %s', r.text)
     S=r.content.split('\n')
     for i,s in enumerate(S):
         S[i]=s.replace('\t', ',', 1).replace('\t', ' ')
     import cStringIO
     return pd.read_csv(cStringIO.StringIO("\n".join(S)))
Example #13
0
    def _fetch(self, path, args={}):
        """return results for a NCBI query, possibly from the cache

        :param: path: relative query path (e.g., 'einfo.fcgi')
        :param: args: dictionary of query args
        :rtype: xml string

        The args are joined with args required by NCBI (tool and email
        address) and with the default args declared when instantiating
        the client.
        """

        url = EUtils.url_base + path
        if type(args) is dict:
            args = dict(self.def_args.items() + args.items())
        trial = 1
        while True:
            if self.debug:
                print "trial %d" % trial
            # else args is str, pass as it is
            req_int = self.request_interval()
            sleep_time = req_int - (time.clock() - self._last_request_clock)
            #print "Sleep: ", sleep_time
            if sleep_time > 0:
                if self.debug:
                    print "sleep_time %d" % sleep_time
                time.sleep(sleep_time)
            r = requests.post(url, args)
            self._last_request_clock = time.clock()
            self._request_count += 1
            if self.debug:
                print r.text

            if not r.ok:
                if trial == self._giveup:
                    if any(bad_word in r.text
                           for bad_word in ['<error>', '<ERROR>']):
                        xml = ET.fromstring(r.text.encode('utf-8'))
                        util.error_msg(
                            '{r.reason} ({r.status_code}): {error}'.format(
                                r=r, error=xml.find('ERROR').text))
                    else:
                        util.error_msg(
                            '{r.reason} ({r.status_code}): {r.error}'.format(
                                r=r, error=r.text))
                else:
                    time.sleep(10)
            else:
                return r.content  #.encode('utf-8')
            trial += 1
Example #14
0
 def get_date(self):
     t=self.db.from_sql('select distinct history from statistics order by history desc limit 0,2')
     print t
     self.d1=self.d2=None
     if len(t)==0:
         util.error_msg('No history data is found')
     elif len(t)==1:
         util.warn_msg('Only one history entry')
         self.d1=str(t.ix[0, 'history'])
     else:
         self.d1=str(t.ix[0, 'history'])
         self.d2=str(t.ix[1, 'history'])
     print self.d1
     print self.d2
Example #15
0
def write(fn=None):
    """
    Write the current queue to a file.

    Keyword arguments:
    fn=None: File to be written to.
      File is stored at ~/.local/share/pmcli/playlists/.
    """
    if not queue:  # Can't save an empty queue.
        error_msg(outbar, 'Queue is empty.')
        return

    if fn is None:  # No argument.
        error_msg(outbar, 'Missing argument to write.')
        return

    path = join(expanduser('~'), '.local', 'share', 'pmcli', 'playlists')
    if not exists(path):  # No playlists directory.
        error_msg(outbar, 'Path to playlists does not exist.')

    elif exists(join(path, fn)):
        error_msg(outbar, 'Playist %s already exists.' % fn)

    else:  # Write the playlist.
        with open(join(path, fn), 'a') as f:
            json.dump(queue, f)
        addstr(outbar, 'Wrote queue to %s.' % fn)
Example #16
0
 def task_factory(xe):
     class_name = xe.tag
     if class_name in globals():
         task_class = globals()[class_name]
     else:
         module_name = class_name.lower()
         if os.path.exists(
                 os.path.join(SyncDB.LIB_DIR(), module_name + ".py")):
             m = __import__('task_class.' + module_name, globals(),
                            locals(), ['*'], -1)
             task_class = m.__dict__[class_name]
             globals()[class_name] = task_class
         else:
             print util.error_msg('Module not found: ' + class_name)
     return task_class(xe=xe)
Example #17
0
 def calc_node_info(self, node, degreeCutoff=None):
     k = self.degree(node)
     neighbors = self.neighbors(node)
     s_md5=""
     #print("::::",node, ":::", k, "::::")
     #sw=util.StopWatch()
     if (k < 2):
         nodeInfo = NodeInfo()
         if (k == 1):
             nodeInfo.coreLevel = 1
             nodeInfo.coreDensity = 1.0
             nodeInfo.density = 1.0
             nodeInfo.numNodeNeighbors = len(neighbors); #########
             nodeInfo.nodeNeighbors = neighbors; ####
             # why ignore neighbor when k==1 in the original code???
     else:
         gpNodeNeighborhood = self.subnetwork(neighbors+[node])
         #sw.check('subnetwork')
         if (gpNodeNeighborhood.is_empty()):
             util.error_msg("In calc_node_info(): gpNodeNeighborhood was None.")
         #calculate the node information for each node
         if self.l_cache:
             s_md5=gpNodeNeighborhood.node_MD5()
             if s_md5 in self.cache_info:
                 #self.hit+=1
                 nodeInfo=self.cache_info[s_md5].clone()
                 nodeInfo.nodeNeighbors=neighbors
                 return nodeInfo
         nodeInfo = NodeInfo()
         #density
         nodeInfo.density = MCODE.calc_density(gpNodeNeighborhood, self.params['includeLoops'])
         #w.check('density')
         nodeInfo.numNodeNeighbors = len(neighbors)
         #calculate the highest k-core
         c = self.get_highest_KCore(gpNodeNeighborhood)
         #w.check('kcore')
         k = c['k']
         gpCore = c['network']
         nodeInfo.coreLevel = k
         if (gpCore is not None and not gpCore.is_empty()):
             nodeInfo.coreDensity = MCODE.calc_density(gpCore, self.params['includeLoops'])
         #w.check('cacl_density')
         #record neighbor array for later use in cluster detection step
         nodeInfo.nodeNeighbors = neighbors
     if degreeCutoff: nodeInfo.score_node(degreeCutoff)
     if self.l_cache:
         self.cache_info[s_md5]=nodeInfo
     return nodeInfo
Example #18
0
 def parse_uniprot_kinase(self):
     url = 'http://www.uniprot.org/docs/pkinfam'
     r = requests.post(url)
     if not r.ok:
         util.error_msg('Cannot fetch kinase members from UniProt: %s',
                        r.text)
     S = r.content.split('\n')
     #s_file='pkinfam'
     #f=open(s_file)
     data = []
     n = len(S)
     i = 0
     while i < n:
         line = S[i]
         i += 1
         if re.search(r"^=+", line):
             s_grp = S[i]
             i += 1
             s = S[i]
             i += 1
             if not re.search(r"^=+", line):
                 util.error_msg("Parsing error, expecting: ====")
             s = S[i]
             i += 1
             while re.search('^\W*$', s):
                 s = S[i]
                 i += 1
                 continue
             #if re.search('\w', s):
             #    util.error_msg("Parsing error, expecting a blank line")
             while re.search('\w', s):
                 rslt = re.search('_HUMAN\s+\(<a.+>(\w+)<\/a>\s+\)', s)
                 if rslt is not None:
                     data.append({
                         'annotation_field1': rslt.groups()[0],
                         'content': s_grp
                     })
                 s = S[i]
                 i += 1
     t_kinase = pd.DataFrame(data)
     t_kinase['gid'] = t_kinase.annotation_field1.apply(
         lambda x: self.uniprot2gid.get(x, 0))
     t_kinase['tax_id'] = '9606'
     t_kinase = t_kinase[t_kinase['gid'] > 0].copy()
     t_kinase.to_csv(os.path.join(SyncDB.DOWNLOAD_DIR(),
                                  self.fn_dest_kinase),
                     index=False)
     print "%d Kinase Proteins Fetched" % len(t_kinase)
Example #19
0
    def _batch_retrieve(self, action, args, count=0, func=None, retmax=10000):
        """Return dict {WebEnv, query_key}"""
        # action can be efetch or esummary
        # according to https://www.ncbi.nlm.nih.gov/books/NBK25499/
        # maximumly allowed retmax is 10000 for efetch and esummary
        retstart = 0
        if 'id' in args:
            count, s_id = self._format_ids(args['id'])
            args['id'] = s_id
            args = dict(self.def_args.items() + args.items())
            out = self.epost(args)
            del args['id']
            xml = ET.fromstring(out)
            args['WebEnv'] = xml.find('WebEnv').text
            args['query_key'] = xml.find('QueryKey').text
        else:
            if 'WebEnv' not in args and 'query_key' not in args:
                util.error_msg('Missing id, WebEnv, query_key!')
        #if action=='elink' and 'cmd' in args and args['cmd']=='neighbor_history':
        #    # not very meaningful for our use, as we separate id by id=&id=...,
        #    # it will return one query_key per input id
        #    del args['cmd']
        out = self._fetch(
            "/" + action + '.fcgi',
            dict(args.items() + [('retmax', retmax), ('retstart', 0)]))
        #if 'cmd' in args and args['cmd']=='neighbor_history':
        #    xml=ET.fromstring(out)
        #    webenv=xml.find('./LinkSet/WebEnv').text
        #    query_key=xml.find('./LinkSet/LinkSetDbHistory/QueryKey').text
        #    return {'WebEnv': webenv, 'query_key': query_key}

        if func is not None and callable(func):
            out = func(out)
        S_xml = [out]
        # if there are more entries than retmax, we need to make additional trip
        while (count > 0 and retstart + retmax < count):
            retstart += retmax
            print "Fetching batch: %d ..." % (retstart)
            out = self._fetch(
                "/" + action + '.fcgi',
                dict(args.items() +
                     [('retmax', retmax), ('retstart', retstart)]))
            if func is not None and callable(func):
                out = func(out)
            S_xml.append(out)
        return S_xml
Example #20
0
 def to_list(self, S_attr=None):
     """Return a list of dict, each dict represents an object, each object contains keys specified by S_attr.
     Generally, this is the only method we need to call to convert XML into a list of dict."""
     out = [{} for x in self.data]
     for x in S_attr:
         if getattr(self, x) is not None:
             S_value = getattr(self, x)()
             if type(S_value) is list:
                 # for gene description, it's a list
                 self._add_attr(out, x, S_value)
             elif type(
                     S_value) is dict:  # a dict where each value is a list
                 # for pubmed journal, it's a list of dict
                 for k, v in S_value.items():
                     self._add_attr(out, x + '.' + k, v)
         else:
             util.error_msg('Attribute %s not implemented!' % x)
     return out
Example #21
0
def adjust_p(R_p, N=None, method="BH"):
    """Calculate FDR for multiple test. N is the total # of tests run, if not given, set to len(R_p)
    R_p: an array of p-values
    N: int, total number of tests run
    method: currently fixed to Benjamini and Hochberg method.
    Output has been valided with adjust.p in R"""
    l_old = False  # old implementation, slower, keep in case there is bug, new code has been tested
    N = len(R_p) if N is None else N
    if method.upper() == "BONFERRONI":
        return np.clip(np.array(R_p) * N, 0.0, 1.0)
    elif method.upper() == "HOLM":
        n = len(R_p)
        t = pd.DataFrame({'p': R_p, 'q': R_p, 'I': list(range(len(R_p)))})
        t.sort_values('p', ascending=True, inplace=True)
        t.index = range(n)
        if l_old:
            q = 0.0
            for i in range(n):
                q = t.ix[i, 'q'] = min(max(q, t.ix[i, 'p'] * (N - i)), 1)
        else:
            q = np.clip(t.p.values * (N - np.arange(n)), 0.0, 1.0)
            q = np.maximum.accumulate(q)
            t['q'] = q
        t.sort_values('I', inplace=True)
        return t.q.values
    elif method.upper() in ("BH", "FDR"):
        n = len(R_p)
        t = pd.DataFrame({'p': R_p, 'q': R_p, 'I': list(range(len(R_p)))})
        t.sort_values('p', ascending=False, inplace=True)
        t.index = range(n)
        if l_old:
            q = 1.0
            for i in range(n):
                q = t.ix[i, 'q'] = min(q,
                                       t.ix[i, 'p'] * N * 1.0 / (len(t) - i))
        else:
            q = np.clip(t.p.values * N * 1.0 / (n - np.arange(n)), 0.0, 1.0)
            q = np.minimum.accumulate(q)
            t['q'] = q
        t.sort_values('I', inplace=True)
        return t.q.values
    else:
        util.error_msg('Unsupported method: %s' % method)
Example #22
0
    def make_input(self, s_file='untitled', options=None):
        if self.table is None: util.error_msg('Clustering.make_input: missing Clustering.table!')
        S=self.table.header()
        S_up=[ s.upper() for s in S]
        opt=self.input_opt
        opt.update(options or {})
        self.input_opt=opt
        S_miss=[s for s in opt['DATA_COLS'] if S.index(s)<0]
        if len(S_miss)>0: util.error_msg('Clustering.make_input: missing data column: '+", ".join(S_miss))
        i_id=util.index(opt['ID'], S)
        if (i_id<0):
            i_id=S_up.index('GENE')
            if i_id<0: util.error_msg('Clustering.make_input: no column is specified as the ID!')
            opt['ID']=S[i_id]
        if type(opt['DESCRIPTION']) is str: opt['DESCRIPTION']=[opt['DESCRIPTION']]
        I_des=[util.index(s, S) for s in opt['DESCRIPTION'] if util.index(s, S)>=0]

        if (len(I_des)==0):
            I_des=[i_id]
            opt['DESCRIPTION']=[opt['ID']]
        else:
            for i in I_des:
                self.table.iloc[:, i]=util.sarray2sarray(self.table.iloc[:,i])
        i_w=util.index(opt['WEIGHT_COL'], S)
        opt['DATA_COLS']=self.get_default_exp_cols(opt['DATA_COLS'])
        n_exp=len(opt['DATA_COLS'])
        if n_exp==0: util.error_msg('Clustering.make_input: no data column is specified!')

        S_out=[]
        S_out.append('Gene\tDescription\tWeight\t'+'\t'.join(opt['DATA_COLS']))
        if opt['EXP_WEIGHT'] is None or len(opt['EXP_WEIGHT'])!=n_exp:
            S_out.append('Exp\t\t'+'\t1'*n_exp)
        else:
            S_out.append('Exp\t\t\t'+'\t'.join(util.rarray2sarray(opt['EXP_WEIGHT'], s_format='%g', s_null=1.0)))
        #df.fillna('', inplace=True)
        i_cols=[S.index(s) for s in opt['DATA_COLS']]
        if opt['GENE_WEIGHT'] is not None and len(opt['GENE_WEIGHT'])==len(self.table):
            if opt['WEIGHT_COL']=='':
                opt['WEIGHT_COL']='WEIGHT'
            self.table[opt['WEIGHT_COL']]=opt['GENE_WEIGHT']
        for i in range(len(self.table)):
            s=str(self.table.iloc[i, i_id])+'\t'+":".join(self.table.iloc[i, I_des])+'\t'+str(self.table.iloc[i, i_w] if i_w>=0 else 1)
            R=np.array([x for x in self.table.iloc[i,i_cols]])
            if opt['GENE_NORMALIZE'] and opt['NORMALIZE_METHOD']=='Z':
                valid=util.no_nan(R)
                if len(valid)>1:
                    R=(R-np.mean(valid))/np.std(R, ddof=1)
            s+='\t'+'\t'.join(['' if pd.isnull(x) else str(x) for x in R])
            S_out.append(s)
        if re.search(r'\.input$', s_file) is not None:
            s_file=re.sub(r'\.input$', '', s_file)
        util.save_list(s_file+".input", S_out, s_end='\n')
        self.input=s_file
Example #23
0
def sql_in(s_sql_left,
           s_sql_right,
           S_id,
           num=1000,
           con=None,
           params_before=None,
           params_after=None):
    if con is None:
        util.error_msg('Database connection is not provided!')
    S_id = list(S_id)
    n = len(S_id)
    # in case it's multi-line SQL statement
    s_sql_left = re.sub('[\r\n]', ' ', s_sql_left)
    s_sql_right = re.sub('[\r\n]', ' ', s_sql_right)
    s_sql_right = re.sub(r'^\s*\)', '', s_sql_right)
    pat = re.compile(r'\s+([\w.]+)\s+IN\s*\(\s*$', re.I)
    m = re.search(pat, s_sql_left)
    if m is None:
        util.error_msg('Left SQL does not ends with IN statement: %s' %
                       s_sql_left)
    s_sql_left = s_sql_left[:m.start()] + " "
    s_col = m.groups()[0]

    # somethings SQL contains its own parameters, we need to provide parameter before/after the
    # S_id parameters
    params_before = params_before or []
    params_after = params_after or []
    # If oracle, either use ToTable() or run multiple SQL queries
    # If multiple SQL are run, results do not support GROUP BY, ORDER BY, DISTINCT
    # as they are applied to individual SQL runs
    # For other SQL servers, results will be exact, via multiple OR statements
    S = []
    for i in range(0, n, num):
        j = min(n, i + num)
        S.append(",".join(["?"] * (j - i)))
    s_id = "(" + s_col + " IN (" + (") OR " + s_col + " IN (").join(S) + "))"
    if db_type(con) == 'MYSQL' and not S:
        s_id = "(" + s_col + " IN (''))"
    t = from_sql(con,
                 s_sql_left + s_id + s_sql_right,
                 params=params_before + S_id + params_after)
    return t
Example #24
0
def insert_pct(pct_slide, s_img, top=None):
    """My special slide layout, a title and one big picture"""
    (left, width, height) = (296260, 8551480, 5078313)
    if top is None:
        top = 1347965 + 25 * 914400 // 72
    center_x = left + width // 2
    center_y = top + height // 2
    ratio0 = width * 1.0 / height
    if not os.path.exists(s_img):
        util.error_msg('File $s not exist!' % s_img)
    M = scipy.misc.imread(s_img)
    h, w, z = M.shape
    ratio = w * 1.0 / h
    if ratio > ratio0:
        height = int(width / ratio)
        top = center_y - height // 2
    else:
        width = int(height * ratio)
        left = center_x - width // 2
    pct_slide.shapes.add_picture(s_img, left, top, width, height)
Example #25
0
def read_cdt(s_file):
    if not s_file.endswith('.cdt'):
        s_file += '.cdt'
    if not os.path.exists(s_file):
        util.error_msg("File not exist: " + s_file + "!")
    f = open(s_file)
    S_header = f.readline().strip().split("\t")
    i_w = util.index("GWEIGHT", S_header)
    i_gene = util.index('GENE', S_header)
    i_name = util.index('NAME', S_header)
    l_start = False
    R_exp = []
    R_gene = []
    data = []
    offset = 0
    while True:
        line = f.readline()
        if not line: break
        S = line.strip().split("\t")
        if S[0] == 'EWEIGHT':
            for i in range(1, len(S)):
                if S[i] != "":
                    offset = i
                    break
            tmp = []
            if i_gene >= 0: tmp.append(S_header[i_gene])
            if i_name >= 0: tmp.append(S_header[i_name])
            S_header = tmp + S_header[offset:]
            R_exp = util.sarray2rarray(S[offset:])
            if i_w < 0: i_w = offset - 1
            l_start = True
        elif l_start:
            one = []
            if i_gene >= 0: one.append(S[i_gene])
            if i_name >= 0: one.append(S[i_name])
            one.extend(util.sarray2rarray(S[offset:]))
            data.append(one)
            R_gene.append(float(S[i_w]))
    f.close()
    t = pd.DataFrame(data, columns=S_header)
    return (t, R_exp, R_gene)
Example #26
0
def transition(input):
    """
    Route input to the appropriate function.

    Arguments:
    input: User input.
    """
    commands = {
        'h': help,
        'help': help,
        'e': expand,
        'expand': expand,
        's': search,
        'search': search,
        'p': play,
        'play': play,
        'q': enqueue,
        'queue': enqueue,
        'w': write,
        'write': write,
        'r': restore,
        'restore': restore,
    }

    arg = None
    if content is None:
        addstr(infobar, 'Now playing: None')

    try:
        command, arg = input.split(maxsplit=1)

    except ValueError:
        command = input

    if command in commands:
        commands[command](arg)
        if content is not None:
            display()

    else:
        error_msg(outbar, 'Nonexistent command.')
Example #27
0
def table_exists(con, s_table, s_db=""):
    db_src = db_type(con)
    if db_src == 'MYSQL':
        s_sql = "select count(*) from information_schema.tables where table_name=?"
        param = [s_table]
        if s_db:
            s_sql += " and table_schema=?"
            param.append(s_db)
        t = from_sql(con, s_sql, param)
    elif db_src == 'ORACLE':
        t = from_sql(
            con, "select count(*) from user_tables where table_name=upper(?)",
            [s_table])
    elif db_src == 'SQLITE':
        t = from_sql(
            con,
            "select count(*) from sqlite_master where type='table' and name=?",
            [s_table])
    else:
        util.error_msg('Unsupported database engine!')
    return t.ix[0, 0] > 0
Example #28
0
def sql_in_old2(s_sql_left,
                s_sql_right,
                S_id,
                num=1000,
                con=None,
                params_before=None,
                params_after=None):
    if con is None:
        util.error_msg('Database connection is not provided!')
    S_id = list(S_id)
    n = len(S_id)
    t = []
    # in case it's multi-line SQL statement
    s_sql_left = re.sub('[\r\n]', ' ', s_sql_left)
    s_sql_right = re.sub('[\r\n]', ' ', s_sql_right)
    s_sql_right = re.sub(r'^\s*\)', '', s_sql_right)
    pat = re.compile(r'\s+([\w.]+)\s+IN\s*\(\s*$', re.I)
    m = re.search(pat, s_sql_left)
    if m is None:
        util.error_msg('Left SQL does not ends with IN statement: %s' %
                       s_sql_left)
    s_sql_left = s_sql_left[:m.start()] + " "

    s_col = m.groups()[0]
    # somethings SQL contains its own parameters, we need to provide parameter before/after the
    # S_id parameters
    params_before = params_before or []
    params_after = params_after or []
    S = []
    for i in range(0, n, num):
        j = min(n, i + num)
        S.append(",".join(["?"] * (j - i)))
    s_id = "(" + s_col + " IN (" + (") OR " + s_col + " IN (").join(S) + "))"
    #s_id = "(" + " OR ".join(["("+s_col + " IN (" + x +"))" for x in S]) + ")"
    #print s_sql_left+s_id+s_sql_right
    t = from_sql(con,
                 s_sql_left + s_id + s_sql_right,
                 params=params_before + S_id + params_after)
    return t
Example #29
0
 def __init__(self, input='', table=None, input_options=None, cluster_options=None, user_hybrid = None):
     if user_hybrid is not None:
         user_hybrid_path = user_hybrid.split()[-1]
         if os.path.exists(user_hybrid_path):
             Clustering.BIN_HYB=user_hybrid
         else:
             util.error_msg("Clustering tool " + user_hybrid_path + " does not exist!")
     self.input_opt={}
     self.cluster_opt={}
     self.input=''
     self.table=None
     if input!='':
         if re.search(r'\.input$', input) is not None:
             self.input=re.sub(r'\.input$', '', s_input)
         else:
             self.input=input
     input_options = input_options or {}
     cluster_options = cluster_options or {}
     self.input_opt=Clustering.DEFAULT_INPUT_OPT.copy()
     self.input_opt.update(input_options)
     if 'EXP_WEIGHT' in cluster_options:
         util.error_msg('Clustering.__init__: EXP_WEIGHT has been moved from cluster_options into input_options!')
     if 'DATA_COLS' in cluster_options:
         util.error_msg('Clustering.__init__: DATA_COLS should be into input_options, not cluster_options!')
     self.cluster_opt=Clustering.DEFAULT_CLUSTER_OPT.copy()
     self.cluster_opt.update(cluster_options)
     if table is not None:
         self.table=table
         if self.table.col_type(self.input_opt['ID'])!='s':
             self.table[self.input_opt['ID']]=self.table[self.input_opt['ID']].astype(str)
     if input=='' and table is None:
         util.error_msg('Clustering.__init__: Missing both input and table!')
     if type(self.input_opt['DESCRIPTION']) is str: self.input_opt['DESCRIPTION']=[self.input_opt['DESCRIPTION']]
     R_w=self.input_opt['EXP_WEIGHT']
     #R_w=input_options['EXP_WEIGHT']
     if R_w is not None and len(R_w)>0:
         R_w=util.sarray2rarray(R_w) # cast to np array
         if np.allclose(R_w, 1.0, atol=1e-5):
             input_options['EXP_WEIGHT']=None
         else:
             input_options['EXP_WEIGHT']=R_w
     R_w=self.input_opt['GENE_WEIGHT']
     #R_w=input_options['GENE_WEIGHT']
     if R_w is not None and len(R_w)>0:
         R_w=util.sarray2rarray(R_w) # cast to np array
         if np.allclose(R_w, 1.0, atol=1e-5):
             input_options['GENE_WEIGHT']=None
         else:
             input_options['GENE_WEIGHT']=R_w
Example #30
0
def play(arg=None):
    """
    Play a MusicObject or the current queue.

    Keyword arguments:
    arg=None: A number n to play item n, 's' to play the queue in shuffle mode,
      or None to play the current queue in order.
    """
    global content

    if arg is None or arg is 's':
        if not queue:  # Can't play an empty queue.
            error_msg(outbar, 'The queue is empty.')

        else:  # Play the queue.
            if arg is 's':  # Shuffle.
                queue.shuffle()
            content = queue.collect()
            display()
            addstr(outbar, '[spc] pause [q] stop [n] next [9-0] volume')
            queue.play(infobar)
            outbar.erase()  # Remove trailing output.
            outbar.refresh()

    elif content is None:  # Nothing to play.
        error_msg(outbar, 'Wrong context for play.')

    else:
        try:
            num = int(arg)

        except ValueError:  # arg needs to be an int if it isn't 's'.
            error_msg(outbar, 'Invalid argument to play.')

        else:
            opt = get_option(num)

            if opt is not None:  # Valid input.
                addstr(outbar, '[spc] pause [q] stop [n] next [9-0] volume')
                opt.play(infobar)
                addstr(infobar, 'Now playing: None')
                outbar.erase()
                outbar.refresh()

            else:  # num out of range.
                error_msg(
                    outbar, 'Invalid number. Valid between 1-%d' %
                    sum([len(content[k]) for k in content.keys()]))