コード例 #1
0
def main(args):
    species2counts = {}
    for species in args['species']:
        hmmoutfiles = glob.glob(args['hmmoutdir'] + "/" + species + "*.hmmout")
        if not len(hmmoutfiles) == 1:
            sys.exit("ERROR: no single hmmout file found for species %s" %
                     species)
        if args.has_key('pidsfile'):
            pidsfiles = glob.glob(args['pidsdir'] + "/" + species + "*.gids")
            if not len(pidsfiles) == 1:
                sys.exit("ERROR: no single pids file found for species %s" %
                         species)
            pids = get_pids(pidsfiles[0]).keys()
        else:
            pids = False
        species2counts[species] = did2count(hmmoutfiles[0], pids)

    ignore = {}
    if args.has_key('ignorefile'): ignore = get_pids(args['ignorefile'])
    hash = {}
    for s, counts in species2counts.iteritems():
        for did in counts.keys():
            if not ignore.has_key(did): hash[did] = 1

    fwt = open("pfam-table", "w")
    fwt.write(string.join(["DID"] + args['species'], "\t") + "\n")
    for did in hash.keys():
        out = did
        for species in args['species']:
            count = 0
            if species2counts[species].has_key(did):
                count = species2counts[species][did]
            out += "\t" + str(count)
        fwt.write(out + "\n")
    fwt.close()

    fwm = open("pfam-matrix.csv", "w")
    fwm.write("," + string.join(args['species'], ",") + "\n")
    for i in range(len(args['species'])):
        s1 = args['species'][i]
        fwm.write(s1)
        for j in range(len(args['species'])):
            if i == j:
                fwm.write(",1")
                continue
            s2 = args['species'][j]
            v1, v2 = [], []
            for did in hash.keys():
                v1.append(species2counts[s1].get(did, 0))
                v2.append(species2counts[s2].get(did, 0))
            cor, p = stats.correlate(v1, v2)
            fwm.write("," + str(cor))
            #print string.join([s1, s2, str(cor), str(p)], "\t")#
        fwm.write("\n")
    fwm.close()
コード例 #2
0
def score_pair(v1, v2, method=1):
    if method == 1:
        cor, p = stats.correlate(v1, v2)
        return cor
    elif method == 2:
        x1, x2 = [], []
        for i in range(len(v1)):
            if v1[i] != 0 or v2[i] != 0: x1.append(v1[i]), x2.append(v2[i])
        cor, p = stats.correlate(x1, x2)
        return cor
    elif method == 3:
        up = 0
        for i in range(len(v1)):
            if v1[i] != 0 and v2[i] != 0: up += 1
        return 1.0 * up / len(v1)
    elif method == 4:
        up = 0
        for i in range(len(v1)):
            if v1[i] != 0 and v2[i] != 0: up += 1
        return up
コード例 #3
0
def score_pair(v1, v2, method=1):
  if method == 1:
    cor, p = stats.correlate(v1, v2)
    return cor
  elif method == 2:
    x1, x2 = [], []
    for i in range(len(v1)):
      if v1[i] != 0 or v2[i] != 0: x1.append(v1[i]), x2.append(v2[i])
    cor, p = stats.correlate(x1, x2)
    return cor
  elif method == 3:
    up = 0
    for i in range(len(v1)):
      if v1[i] != 0 and v2[i] != 0: up += 1
    return 1.0*up/len(v1)
  elif method == 4:
    up = 0
    for i in range(len(v1)):
      if v1[i] != 0 and v2[i] != 0: up += 1
    return up
コード例 #4
0
def main( args ):
  species2counts = {}
  for species in args['species']:
    hmmoutfiles = glob.glob(args['hmmoutdir'] + "/" + species + "*.hmmout")
    if not len(hmmoutfiles) == 1: sys.exit("ERROR: no single hmmout file found for species %s" % species)
    if args.has_key('pidsfile'):  
      pidsfiles = glob.glob(args['pidsdir'] + "/" + species + "*.gids")
      if not len(pidsfiles) == 1: sys.exit("ERROR: no single pids file found for species %s" % species)
      pids = get_pids(pidsfiles[0]).keys()
    else: pids = False
    species2counts[species] = did2count(hmmoutfiles[0], pids)
  
  ignore = {}
  if args.has_key('ignorefile'): ignore = get_pids(args['ignorefile'])
  hash = {}
  for s, counts in species2counts.iteritems():
    for did in counts.keys(): 
      if not ignore.has_key(did): hash[did] = 1
    
  fwt = open("pfam-table", "w")
  fwt.write(string.join(["DID"] + args['species'], "\t") + "\n")
  for did in hash.keys():
    out = did
    for species in args['species']:
      count = 0
      if species2counts[species].has_key(did): count = species2counts[species][did]
      out += "\t" + str(count)
    fwt.write(out + "\n")
  fwt.close()
  
  fwm = open("pfam-matrix.csv", "w")
  fwm.write("," + string.join(args['species'], ",") + "\n")
  for i in range(len(args['species'])):
    s1 = args['species'][i]
    fwm.write(s1)
    for j in range(len(args['species'])):
      if i == j: 
        fwm.write(",1")
        continue
      s2 = args['species'][j]
      v1, v2 = [], []
      for did in hash.keys():
        v1.append(species2counts[s1].get(did, 0))
        v2.append(species2counts[s2].get(did, 0))
      cor, p = stats.correlate(v1, v2)
      fwm.write("," + str(cor))
      #print string.join([s1, s2, str(cor), str(p)], "\t")#
    fwm.write("\n")
  fwm.close()