Ejemplo n.º 1
0
def main():

    dh = Datahelper()
    try:
        chembl = pymysql.connect(host=os.environ["CHHOST"],
                                 user=os.environ["CHUSER"],
                                 passwd=os.environ["CHPWD"],
                                 port=int(os.environ["CHPORT"]),
                                 db=os.environ["CHDB"])
    except:
        #print "Unexpected error:", sys.exc_info()[0]
        print "Unexpected error:", sys.exc_info()
        exit()

    count = 0
    for line in sys.stdin:
        data = line.strip().split(',')
        atc_code = data[3]
        rdata = data[:4]
        query = "select level3_description from atc_classification where level3 = '%s' limit 1" % (
            atc_code)
        count = 0

        with chembl.cursor() as cursor:
            cursor.execute(query)
            for row in cursor:
                count += 1
                descr = dh.get_normalised_phrase(row[0].lower())
                rdata.append(descr)
                rdata.append(data[4])
                print ','.join(rdata)

    chembl.close()
    return count
Ejemplo n.º 2
0
def main(options):
    count = 0
    mcount = 0
    umcount = 0
    dh = Datahelper()

    try:
        fh = open(options.synfile, "r")
        synonyms = load_synonyms(fh)
    except IOError as e:
        print "I/O error({0}): {1}".format(e.errno, e.strerror)
        exit()
    except TypeError as e:
        print "Missing arguments ", e
        exit()
    except:
        print "Unexpected error:", sys.exc_info()
        exit()

    for line in sys.stdin:
        count += 1
        data = line.strip().split(',')
        phrase = data[1].lower()
        matched = False
        for key in dh.get_merge_key_list(phrase):
            if key in synonyms:
                print "%s,%s,%s" % (data[0], phrase, '|'.join(synonyms[key]))
                matched = True
                mcount += 1
                break
        if matched == False:
            print "%s,%s" % (data[0], phrase)

    return count, mcount
Ejemplo n.º 3
0
def main():

  dh = Datahelper()
  try:
  except:
    #print "Unexpected error:", sys.exc_info()[0]
    print "Unexpected error:", sys.exc_info()
    exit()


  count = 0
  for line in sys.stdin:
    data = line.strip().split(',')
    atc_code = data[0]
    query = "select level1_description, level2_description, level3_description from atc_classification where level3 = '%s' limit 1" % (atc_code)
    count = 0
  
    with chembl.cursor() as cursor:
      cursor.execute(query)
      for row in cursor:
        count += 1
        descr = row[0].lower() + ": " + dh.get_normalised_phrase(row[1].lower() + " " + row[2].lower())
        data.append(descr)
        print ','.join(data)

  chembl.close()
  return count
def main(options):
    """
  """

    dh = Datahelper()
    try:
        fh = open(options.bnfcodes, "r")
        code_lookup = load_code_list(fh)
    except:
        print "Unexpected error:", sys.exc_info()
        exit()

    print "pheno,PHENOTYPE,Category,type"

    count = 0
    for line in sys.stdin:
        data = line.strip().split(',')
        count = 0

        if data[0] in code_lookup:
            count += 1
            pheno_string = dh.get_normalised_phrase(code_lookup[data[0]])
            pheno_string = dh.make_pheno_string(pheno_string)
            #data.append(data[0] + "_" + pheno_string)
            data.append(data[0])
            data.append(pheno_string)
            data.append("BINARY")
            print ','.join(data)

    return count
Ejemplo n.º 5
0
def main():

    try:
        dh = Datahelper()
    except IOError as e:
        print "I/O error({0}): {1}".format(e.errno, e.strerror)
        print "I/O error:", sys.exc_info()
        exit()
    except TypeError as e:
        print "Missing arguments ", e
        exit()
    except:
        #print "Unexpected error:", sys.exc_info()[0]
        print "Unexpected error:", sys.exc_info()
        exit()

    ewords = dh.get_excluded_words()

    count = 0
    outarray = []
    for wd in ewords:
        count += 1
        outarray.append(wd)
        if count % 4 == 0:
            print ",".join(outarray)
            outarray = []
    if len(outarray) > 0:
        print ",".join(outarray)
    #print count

    return
Ejemplo n.º 6
0
def main():
    """
  Requires the input to be sorted on the first field (the numeric molregno).
  One record per molregno is output.

  Calls a datahelper function to normalise each word or phrase (convert to 
  lower case and remove special characters)
  """
    count = 0
    last_molno = ""
    related_synonyms = []
    dh = Datahelper()

    for line in sys.stdin:
        data = line.strip().split('\t')
        if data[0] != last_molno and last_molno != "":
            print '|'.join(related_synonyms) + "|MOLREGNO:" + last_molno
            related_synonyms = []
        last_molno = data[0]
        text = dh.get_normalised_phrase(data[1])
        stype = data[2]
        syn = stype + ":" + text
        if syn not in related_synonyms:
            related_synonyms.append(syn)

    # output the last synonym group
    print '|'.join(related_synonyms) + "|MOLREGNO:" + last_molno
Ejemplo n.º 7
0
def main(options):
    dh = Datahelper()

    for line in sys.stdin:
        data = line.strip().split(',')
        data[0] = dh.format_atc_code(data[0], int(options.codelen))
        data[1] = data[1].lower()
        print ','.join(data)
Ejemplo n.º 8
0
def main(options):
    count = 0
    dh = Datahelper()

    for line in sys.stdin:
        count += 1
        data = line.strip().split(',')
        data[2] = dh.format_atc_code(data[2], int(options.codelen))
        print ','.join(data)

    return count
def main(options):
  """
  The main match process - look up descriptions and 
  synonyms in the coding data dictionary (loaded
  on initialisation
  SEE ALSO: datahelper.py
  """
  dcount = 0
  count = 0
  match_count = 0
  miss_count = 0

  # try to load the classification system codes file
  try:
    fh = open(options.clsfile, "r")
    dh = Datahelper()
    dcount = dh.load_cls_phrases(fh)
    #print "Dictionary size = %d" % (dcount)
  except IOError as e:
    print "I/O error({0}): {1}".format(e.errno, e.strerror)
    print "I/O error:", sys.exc_info()
    exit()
  except TypeError as e:
    print "Missing arguments ", e
    exit()
  except:
    #print "Unexpected error:", sys.exc_info()[0]
    print "Unexpected error:", sys.exc_info()
    exit()

  # stdin used to read in medications coding data
  hdr = sys.stdin.readline()
  for line in sys.stdin:
    count += 1
    matched = False
    data = line.strip().split(',')
    all_phrases = [data[1]]
    if len(data) == 3:
      all_phrases += data[2].split('|')

    match_string = ""
    code_array, match_data, last_match, selected_code = dh.match_all_phrases(all_phrases)
    if len(code_array) > 0:
    # Current policy: output one line per code match (can be multiple per input record
      for code_elem in code_array:
        code_data = code_elem.split("~")
        print "%s,%s,%s,%s,%s,%s,%d" % (data[0], data[1], last_match, '|'.join(match_data), code_data[1], code_data[0], len(code_array))
      match_count += 1
    else:      
      print "%s,%s,%s,%s,%s,%s,0" % (data[0], data[1], last_match, '|'.join(match_data), "NA", "NA")
      miss_count += 1

  return count, match_count, miss_count 
Ejemplo n.º 10
0
def main():
    count = 0
    dh = Datahelper()
    hdr = sys.stdin.readline().strip()
    print hdr

    for line in sys.stdin:
        count += 1
        data = line.strip().split(',')
        data[0] = dh.format_digit_code(data[0], 3)
        data[1] = data[1].lower()
        print ','.join(data)

    return count
Ejemplo n.º 11
0
def main():
    count = 0
    last_molno = ""
    related_synonyms = []
    dh = Datahelper()

    for line in sys.stdin:
        data = line.strip().split('\t')
        print "%s\t%s" % (data[1], data[0])
def main(options):
    """
  Access the CHEMBL db for each input line and use the description
  from the appropriate level
  """

    level = int(options.level)

    dh = Datahelper()
    try:
        chembl = pymysql.connect(host=os.environ["CHHOST"],
                                 user=os.environ["CHUSER"],
                                 passwd=os.environ["CHPWD"],
                                 port=int(os.environ["CHPORT"]),
                                 db=os.environ["CHDB"])
    except:
        #print "Unexpected error:", sys.exc_info()[0]
        print "Unexpected error:", sys.exc_info()
        exit()

    print "pheno,PHENOTYPE,Category,type"

    count = 0
    for line in sys.stdin:
        data = line.strip().split(',')
        atc_code = data[0]
        query = "select level%d_description from atc_classification where level%d = '%s' limit 1" % (
            level, level, atc_code)
        count = 0

        cursor = chembl.cursor()
        cursor.execute(query)
        for row in cursor:
            count += 1
            pheno_string = dh.get_normalised_phrase(row[0])
            pheno_string = dh.make_pheno_string(pheno_string)
            data.append(data[0] + "_" + pheno_string)
            data.append(pheno_string)
            data.append("BINARY")
            print ','.join(data)

    chembl.close()
    return count
def main(options):
    count = 0
    match_count = 0
    miss_count = 0
    dh = Datahelper()

    try:
        fh = open(options.codefile, "r")
        code_lookup = load_cs_codes(fh)
        #print len(synonyms)
    except IOError as e:
        print "I/O error({0}): {1}".format(e.errno, e.strerror)
        exit()
    except TypeError as e:
        print "Missing arguments ", e
        exit()
    except:
        #print "Unexpected error:", sys.exc_info()[0]
        print "Unexpected error:", sys.exc_info()
        exit()

    hdr = sys.stdin.readline().strip()
    print "%s,%s" % (hdr, "cs_code")

    for line in sys.stdin:
        count += 1
        data = line.strip().split(',')
        data.append("NA")
        if data[1] in code_lookup:
            for code in code_lookup[data[1]]:
                data[-1] = code
                match_count += 1
                print ",".join(data)
        else:
            miss_count += 1
            print ",".join(data)

    return count, match_count, miss_count