def main():

    args = getArgs()
    key = encodedcc.ENC_Key(args.keyfile, args.key)
    connection = encodedcc.ENC_Connection(key)
    accessions = []
    if args.infile:
        accessions = [line.rstrip("\n") for line in open(args.infile)]
    elif args.query:
        data = encodedcc.get_ENCODE(args.query, connection).get("@graph", [])
        for exp in data:
            files = exp.get("files", [])
            for f in files:
                res = encodedcc.get_ENCODE(f, connection)
                f_type = res.get("file_format", "")
                if f_type == "fastq":
                    accessions.append(res["accession"])
    elif args.accession:
        accessions = [args.accession]
    else:
        print("No accessions to check")
        sys.exit(1)
    for acc in accessions:
        link = "/files/" + acc + "/@@download/" + acc + ".fastq.gz"
        for header, sequence, qual_header, quality in encodedcc.fastq_read(connection, uri=link):
            if args.header:
                header = header.decode("UTF-8")
                print(header)
            else:
                sequence = sequence.decode("UTF-8")
                print(acc + "\t" + str(len(sequence)))
def process_row(row, connection):
    json_payload = {}
    flowcell_dict = {}
    if row.get("file_format", "") == "fastq":
        for header, sequence, qual_header, quality in encodedcc.fastq_read(
                connection, filename=row["submitted_file_name"]):
            sequence = sequence.decode("UTF-8")
            read_length = len(sequence)
            json_payload.update({"read_length": read_length})
    for key in row.keys():
        k = key.split(":")
        if k[0] in ["flowcell", "machine", "lane", "barcode"]:
            flowcell_dict[k[0]] = row[k[0]]
        else:
            if len(k) > 1:
                if k[1] in ["int", "integer"]:
                    value = int(row[key])
                elif k[1] in ["list", "array"]:
                    value = row[key].strip("[]").split(",")
            else:
                value = row[key]
            if not k[0]:
                continue
            try:
                if type(value) == list:
                    json_payload.update({k[0]: value})
                else:
                    json_payload.update({k[0]: json.loads(value)})
            except:
                try:
                    json_payload.update({k[0]: json.loads('"%s"' % (value))})
                except:
                    logger.warning(
                        'Could not convert field %s value %s to JSON' %
                        (k[0], value))
                    return None
    if any(flowcell_dict):
        flowcell_list = [flowcell_dict]
        json_payload.update({"flowcell_details": flowcell_list})
    if type(json_payload.get("paired_end")) == int:
        if json_payload["paired_end"] == 1:
            json_payload.pop("paired_with", None)
        json_payload["paired_end"] = str(json_payload["paired_end"])
    print(json_payload)
    return json_payload
def process_row(row, connection):
    json_payload = {}
    flowcell_dict = {}
    if row.get("file_format", "") == "fastq":
        for header, sequence, qual_header, quality in encodedcc.fastq_read(connection, filename=row["submitted_file_name"]):
                sequence = sequence.decode("UTF-8")
                read_length = len(sequence)
                json_payload.update({"read_length": read_length})
    for key in row.keys():
        k = key.split(":")
        if k[0] in ["flowcell", "machine", "lane", "barcode"]:
            flowcell_dict[k[0]] = row[k[0]]
        else:
            if len(k) > 1:
                if k[1] in ["int", "integer"]:
                    value = int(row[key])
                elif k[1] in ["list", "array"]:
                    value = row[key].strip("[]").split(",")
            else:
                value = row[key]
            if not k[0]:
                continue
            try:
                if type(value) == list:
                    json_payload.update({k[0]: value})
                else:
                    json_payload.update({k[0]: json.loads(value)})
            except:
                try:
                    json_payload.update({k[0]: json.loads('"%s"' % (value))})
                except:
                    logger.warning('Could not convert field %s value %s to JSON' % (k[0], value))
                    return None
    if any(flowcell_dict):
        flowcell_list = [flowcell_dict]
        json_payload.update({"flowcell_details": flowcell_list})
    if type(json_payload.get("paired_end")) == int:
        if json_payload["paired_end"] == 1:
            json_payload.pop("paired_with", None)
        json_payload["paired_end"] = str(json_payload["paired_end"])
    print(json_payload)
    return json_payload
Example #4
0
def main():

    args = getArgs()
    key = encodedcc.ENC_Key(args.keyfile, args.key)
    connection = encodedcc.ENC_Connection(key)
    accessions = []
    if args.infile:
        if os.path.isfile(args.infile):
            accessions = [line.strip() for line in open(args.infile)]
        else:
            accessions = args.infile.split(",")
    elif args.query:
        data = []
        if "search" in args.query:
            data = encodedcc.get_ENCODE(args.query,
                                        connection).get("@graph", [])
        else:
            data = [encodedcc.get_ENCODE(args.query, connection)]
        for exp in data:
            files = exp.get("files", [])
            for f in files:
                res = encodedcc.get_ENCODE(f, connection)
                f_type = res.get("file_format", "")
                if f_type == "fastq":
                    accessions.append(res["accession"])
    else:
        print("No accessions to check")
        sys.exit(1)
    for acc in accessions:
        link = "/files/" + acc + "/@@download/" + acc + ".fastq.gz"
        for header, sequence, qual_header, quality in encodedcc.fastq_read(
                connection, uri=link):
            if args.header:
                header = header.decode("UTF-8")
                print(acc + "\t" + str(len(sequence)) + "\t" + header)
            else:
                sequence = sequence.decode("UTF-8")
                print(acc + "\t" + str(len(sequence)))