예제 #1
0
def main():
    global write4, write5, write6

    ##########
    # parse the command line
    ##########

    avgBucket = 10
    numBuckets = None
    anonymous = False
    doSecondary = False
    skipHeader = False
    isWindows = False
    fileNames = []
    bigEndian = False
    oddBuckets = False
    keepEmpties = False
    screwup = []  # (so that we can verify that validation works!)
    debug = []
    progress = None

    args = sys.argv[1:]
    while (len(args) > 0):
        arg = args.pop(0)
        val = None
        fields = arg.split("=", 1)
        if (len(fields) == 2):
            arg = fields[0]
            val = fields[1]
            if (val == ""):
                usage("missing a value in %s=" % arg)

        if (arg in ["--help", "-h", "--h", "-help"]) and (val == None):
            usage()
        elif (arg == "--bucketsize") and (val != None):
            try:
                avgBucket = int(val)
                if (avgBucket < 1): raise ValueError
            except ValueError:
                assert (False), "invalid bucket size: %s" % val
        elif (arg == "--numbuckets") and (val != None):
            try:
                numBuckets = int(val)
                if (numBuckets < 1): raise ValueError
            except ValueError:
                assert (False), "invalid number of buckets: %s" % val
        elif (arg == "--secondary") and (val == None):
            doSecondary = True
            assert (False), "secondary hash is not implemented yet (sorry)"
        elif (arg == "--anonymous") and (val == None):
            anonymous = True
        elif (arg == "--skipheader") and (val == None):
            skipHeader = True
        elif (arg == "--windows") and (val == None):
            isWindows = True
        elif (arg == "--bigendian") and (val == None):
            bigEndian = True
        elif (arg == "--oddbuckets") and (val == None):
            oddBuckets = True
        elif (arg == "--keepempties") and (val == None):
            keepEmpties = True
        elif (arg == "--screwup") and (val != None):
            screwup += [val]
        elif (arg == "--debug") and (val == None):
            debug += ["debug"]
        elif (arg == "--debug") and (val != None):
            debug += [val]
        elif (arg == "--progress") and (val == None):
            debug += ["progress"]
            progress = None
        elif (arg == "--progress") and (val != None):
            debug += ["progress"]
            progress = int(val)
        elif (arg.startswith("--")):
            usage("unknown argument: %s" % arg)
        elif (val == None):
            fileNames += [arg]
        else:
            usage("unknown argument: %s" % arg)

    # sanity check on file name

    if (fileNames != []):
        for fileName in fileNames:
            try:
                slash = fileName.rfind("/")
                dot = fileName.rfind(".")
                if (dot < 0): raise ValueError
                if (dot < slash): raise ValueError
                if (fileName[dot:] not in [".fa", ".fasta"]): raise ValueError
            except ValueError:
                assert (False), \
                       "bad fasta file name (it has to end with .fa or .fasta)" \
                     % fileName

    if (anonymous) and (len(fileNames) > 1):
        assert (
            False), "can't use anonymous when you have multiple fasta files"

    assert (len(fileNames) <= 255), "too many input files (max is 255)"

    # set up big- or little-endian

    if (bigEndian):
        write4 = write4_big_endian
        write5 = write5_big_endian
        write6 = write6_big_endian
    else:
        write4 = write4_little_endian
        write5 = write5_little_endian
        write6 = write6_little_endian

    ##########
    # read the fasta file(s)
    ##########

    fileNameToNum = {}

    # read the fasta file(s), collecting names, etc.

    if (fileNames == []):
        fileNames += [""]

    sequences = []
    nameSeen = {}

    for (fileNum, fileName) in enumerate(fileNames):
        assert (fileName not in fileNameToNum), \
               "can't use the same file twice (%s)" % fileName
        fileNameToNum[fileName] = fileNum

        if (fileName == ""):
            f = sys.stdin
        else:
            try:
                f = file(fileName, "rt")
            except IOError:
                assert (False), "unable to open %s" % fileName

        seqNum = 0
        for seqInfo in fasta_sequences(f, twoByteLFs=isWindows):
            (name, length, lineNum, headerOffset, seqOffset) = seqInfo
            seqNum += 1

            assert (name not in nameSeen), \
             "%s is used for two sequences (at %s and %s)" \
              % (name,
                 line_reference(nameSeen[name]),
                 line_reference((fileName,lineNum)))
            nameSeen[name] = (fileName, lineNum)

            if (length == 0):
                if (keepEmpties):
                    print >>sys.stderr, "WARNING: keeping empty sequence %s (%s)" \
                                      % (name,line_reference((fileName,lineNum)))
                else:
                    print >>sys.stderr, "WARNING: discarding empty sequence %s (%s)" \
                                      % (name,line_reference((fileName,lineNum)))
                    continue

            if (skipHeader): sequences += [(name, length, fileNum, seqOffset)]
            else: sequences += [(name, length, fileNum, headerOffset)]

            if ("progress"
                    in debug) and (progress != None) and (seqNum % progress
                                                          == 0):
                print >> sys.stderr, "read sequence %d (%s)" % (seqNum, name)

        if (fileName != ""): f.close()

        if ("progress" in debug):
            if (fileName != ""):
                print >> sys.stderr, "finished reading %s" % fileName
            else:
                print >> sys.stderr, "finished reading input file"

    # scan collected sequence info and assign hash values

    numSequences = len(sequences)
    assert (numSequences > 0), "input file contains no sequences!"
    if (numBuckets == None):
        numBuckets = (numSequences + avgBucket - 1) // avgBucket
    if (numBuckets == 0):
        numBuckets += 1
    if (oddBuckets) and (numBuckets % 1 == 0):
        numBuckets += 1

    sequences = [(HsxFile.hash(name) % numBuckets,name,length,fileNum,offset) \
                                 for (name,length,fileNum,offset) in sequences]
    sequences.sort()

    if ("progress" in debug):
        print >> sys.stderr, "finished computing hashes"

    if ("info" in debug):
        for (hash, name, length, fileNum, offset) in sequences:
            print >>sys.stderr, "%10d==%08X %2d:%08X %s %d" \
                              % (HsxFile.hash(name),hash,fileNum,offset,name,length)

    ##########
    # write the index
    ##########

    # decide how we will write the file names

    fileNumToOffset = {}
    fileNumToFastaName = {}
    fileNumToFastaExt = {}
    fileInfoLength = 0

    for fileName in fileNames:
        fileNum = fileNameToNum[fileName]

        fastaName = ""
        fastaExt = "fa"
        if (fileName != ""):
            dot = fileName.rfind(".")
            fastaExt = fileName[dot + 1:]
            if (not anonymous):
                fastaName = fileName[:dot]

        fileNumToOffset[fileNum] = fileInfoLength
        fileNumToFastaName[fileNum] = fastaName
        fileNumToFastaExt[fileNum] = fastaExt
        fileInfoLength += len(fastaExt) + 1 + len(fastaName) + 1

    # determine header and table sizes

    headerLength = 0x1C
    headerPad = pad_for_16(8 + headerLength)
    headerSize = headerLength + headerPad

    numFiles = len(fileNames)
    fileTableOffset = 0x08 + headerSize
    fileTableLength = numFiles * 4
    fileTablePad = pad_for_16(fileTableLength)
    fileTableSize = fileTableLength + fileTablePad

    fileInfoOffset = fileTableOffset + fileTableSize
    fileInfoPad = pad_for_16(fileInfoLength)
    fileInfoSize = fileInfoLength + fileInfoPad

    hashTableOffset = fileInfoOffset + fileInfoSize
    hashTableLength = (numBuckets + 1) * 5
    hashTablePad = pad_for_16(hashTableLength)
    if ("hashpad" in screwup): hashTablePad = -1
    hashTableSize = hashTableLength + hashTablePad

    seqTableOffset = hashTableOffset + hashTableSize

    if ("file" in debug):
        print >> sys.stderr, "fileTableOffset = %08X (%08X)" % (
            fileTableOffset, fileTableSize)
        print >> sys.stderr, "fileInfoOffset  = %08X (%08X)" % (fileInfoOffset,
                                                                fileInfoSize)
        print >> sys.stderr, "hashTableOffset = %08X (%08X)" % (
            hashTableOffset, hashTableSize)
        print >> sys.stderr, "seqTableOffset  = %08X" % seqTableOffset

    # determine offsets into the sequence table

    nameToOffset = {}

    prevHash = None
    for (hash, name, length, fileNum, offset) in sequences:
        if (hash == prevHash): continue
        nameToOffset[name] = True

    seqOffset = seqTableOffset
    for (hash, name, length, fileNum, offset) in sequences:
        if (name in nameToOffset):
            nameToOffset[name] = seqOffset
        seqOffset += 12 + len(name) + 1
    nameToOffset[""] = seqOffset

    # write header

    write4(HsxFile.magicBig)
    write4(HsxFile.version)

    write4(headerLength)
    write4(numFiles)
    write4(fileTableOffset)
    write4(numBuckets)
    write4(hashTableOffset)
    write4(numSequences)
    write4(seqTableOffset)
    writeZeros(headerPad)

    if ("progress" in debug):
        print >> sys.stderr, "finished writing header"

    # write file table and file info

    for fileName in fileNames:
        fileNum = fileNameToNum[fileName]
        write4(fileInfoOffset + fileNumToOffset[fileNum])
    writeZeros(fileTablePad)

    for fileName in fileNames:
        fileNum = fileNameToNum[fileName]
        writeString(fileNumToFastaExt[fileNum])
        writeString(fileNumToFastaName[fileNum])
    writeZeros(fileInfoPad)

    if ("progress" in debug):
        print >> sys.stderr, "finished writing file table"

    # write hash table

    msBit5 = 0x80 << (4 * 8)

    prevHash = None

    for (hash, name, length, fileNum, offset) in sequences:
        if (hash == prevHash):
            bucketSize += 1
            continue

        if (prevHash != None):
            # output previous bucket
            write5(seqOffset)
            if ("progress" in debug) and (progress != None) and (
                (hash + 1) % progress == 0):
                print >> sys.stderr, "wrote hash bucket %d" % (hash + 1)
            # output intervening empty buckets
            prevHash += 1
            while (prevHash < hash):
                write5(msBit5 + nameToOffset[name])
                prevHash += 1
                if ("progress" in debug) and (progress != None) and (
                        prevHash % progress == 0):
                    print >> sys.stderr, "wrote hash bucket %d" % (prevHash)

        bucketSize = 1
        seqOffset = nameToOffset[name]
        prevHash = hash

    # output previous bucket
    write5(seqOffset)
    seqOffset = nameToOffset[""]  # offset past end of sequence index table
    # output intervening empty buckets
    prevHash += 1
    while (prevHash < numBuckets):
        write5(msBit5 + seqOffset)
        prevHash += 1
    # output extra bucket
    write5(msBit5 + seqOffset)

    writeZeros(hashTablePad)

    if ("progress" in debug):
        print >> sys.stderr, "finished writing hash table"

    # write sequence table

    for (seqNum, (hash, name, length, fileNum,
                  offset)) in enumerate(sequences):
        write5(length)  # length of the sequence
        write1(fileNum)  # file number (index into file table)
        write6(offset)  # offset to the sequence data
        writeString(name)  # name of sequence
        if ("progress" in debug) and (progress != None) and ((seqNum + 1) %
                                                             progress == 0):
            print >> sys.stderr, "wrote sequence entry %d" % (seqNum + 1)

    if ("progress" in debug):
        print >> sys.stderr, "finished writing index"
예제 #2
0
def main():

	##########
	# parse the command line
	##########

	hsxFileName   = None
	seqNames      = []
	warnOnMissing = True
	showProgress  = False
	debug         = []

	args = sys.argv[1:]
	while (len(args) > 0):
		arg = args.pop(0)
		val = None
		fields = arg.split("=",1)
		if (len(fields) == 2):
			arg = fields[0]
			val = fields[1]
			if (val == ""):
				usage("missing a value in %s=" % arg)

		if (arg in ["--help","-h","--h","-help"]) and (val == None):
			usage()
		elif (arg == "--names") and (val != None):
			f = open(val)
			seqNames += [line.strip() for line in f]
			f.close()
		elif (arg == "--nowarn") and (val == None):
			warnOnMissing = False
		elif (arg == "--progress") and (val == None):
			showProgress = True
		elif (arg == "--debug") and (val == None):
			debug += ["debug"]
		elif (arg == "--debug") and (val != None):
			debug += [val]
		elif (arg.startswith("--")):
			usage("unknown argument: %s" % arg)
		elif (hsxFileName == None) and (val == None):
			hsxFileName = arg
		elif (val == None):
			seqNames += [arg]
		else:
			usage("unknown argument: %s" % arg)

	if (hsxFileName == None): usage("you must give me an hsx file!")
	if (seqNames    == []):   usage("you must give me some sequence names!")

	##########
	# fetch the sequences
	##########

	hsx = HsxFile(hsxFileName,debug=debug)
	for name in seqNames:
		seq = hsx.get_sequence(name)
		if (seq != None):
			print (seq)
			if (showProgress):
				print (name,file=sys.stderr)
		elif (warnOnMissing):
			print ("WARNING: %s not found" % name,
			       file=sys.stderr)
	hsx.close()
예제 #3
0
def main():
    global write4, write5, write6

    ##########
    # parse the command line
    ##########

    avgBucket = 10
    numBuckets = None
    anonymous = False
    doSecondary = False
    skipHeader = False
    isWindows = False
    fileNames = []
    bigEndian = False
    oddBuckets = False
    keepEmpties = False
    screwup = []  # (so that we can verify that validation works!)
    debug = []
    progress = None

    args = sys.argv[1:]
    while len(args) > 0:
        arg = args.pop(0)
        val = None
        fields = arg.split("=", 1)
        if len(fields) == 2:
            arg = fields[0]
            val = fields[1]
            if val == "":
                usage("missing a value in %s=" % arg)

        if (arg == "--bucketsize") and (val != None):
            try:
                avgBucket = int(val)
                if avgBucket < 1:
                    raise ValueError
            except ValueError:
                assert False, "invalid bucket size: %s" % val
        elif (arg == "--numbuckets") and (val != None):
            try:
                numBuckets = int(val)
                if numBuckets < 1:
                    raise ValueError
            except ValueError:
                assert False, "invalid number of buckets: %s" % val
        elif (arg == "--secondary") and (val == None):
            doSecondary = True
            assert False, "secondary hash is not implemented yet (sorry)"
        elif (arg == "--anonymous") and (val == None):
            anonymous = True
        elif (arg == "--skipheader") and (val == None):
            skipHeader = True
        elif (arg == "--windows") and (val == None):
            isWindows = True
        elif (arg == "--bigendian") and (val == None):
            bigEndian = True
        elif (arg == "--oddbuckets") and (val == None):
            oddBuckets = True
        elif (arg == "--keepempties") and (val == None):
            keepEmpties = True
        elif (arg == "--screwup") and (val != None):
            screwup += [val]
        elif (arg == "--debug") and (val == None):
            debug += ["debug"]
        elif (arg == "--debug") and (val != None):
            debug += [val]
        elif (arg == "--progress") and (val == None):
            debug += ["progress"]
            progress = None
        elif (arg == "--progress") and (val != None):
            debug += ["progress"]
            progress = int(val)
        elif arg.startswith("--"):
            usage("unknown argument: %s" % arg)
        elif val == None:
            fileNames += [arg]
        else:
            usage("unknown argument: %s" % arg)

            # sanity check on file name

    if fileNames != []:
        for fileName in fileNames:
            try:
                slash = fileName.rfind("/")
                dot = fileName.rfind(".")
                if dot < 0:
                    raise ValueError
                if dot < slash:
                    raise ValueError
                if fileName[dot:] not in [".fa", ".fasta"]:
                    raise ValueError
            except ValueError:
                assert False, "bad fasta file name (it has to end with .fa or .fasta)" % fileName

    if (anonymous) and (len(fileNames) > 1):
        assert False, "can't use anonymous when you have multiple fasta files"

    assert len(fileNames) <= 255, "too many input files (max is 255)"

    # set up big- or little-endian

    if bigEndian:
        write4 = write4_big_endian
        write5 = write5_big_endian
        write6 = write6_big_endian
    else:
        write4 = write4_little_endian
        write5 = write5_little_endian
        write6 = write6_little_endian

        ##########
        # read the fasta file(s)
        ##########

    fileNameToNum = {}

    # read the fasta file(s), collecting names, etc.

    if fileNames == []:
        fileNames += [""]

    sequences = []
    nameSeen = {}

    for (fileNum, fileName) in enumerate(fileNames):
        assert fileName not in fileNameToNum, "can't use the same file twice (%s)" % fileName
        fileNameToNum[fileName] = fileNum

        if fileName == "":
            f = sys.stdin
        else:
            try:
                f = file(fileName, "rt")
            except IOError:
                assert False, "unable to open %s" % fileName

        seqNum = 0
        for seqInfo in fasta_sequences(f, twoByteLFs=isWindows):
            (name, length, lineNum, headerOffset, seqOffset) = seqInfo
            seqNum += 1

            assert name not in nameSeen, "%s is used for two sequences (at %s and %s)" % (
                name,
                line_reference(nameSeen[name]),
                line_reference((fileName, lineNum)),
            )
            nameSeen[name] = (fileName, lineNum)

            if length == 0:
                if keepEmpties:
                    print >>sys.stderr, "WARNING: keeping empty sequence %s (%s)" % (
                        name,
                        line_reference((fileName, lineNum)),
                    )
                else:
                    print >>sys.stderr, "WARNING: discarding empty sequence %s (%s)" % (
                        name,
                        line_reference((fileName, lineNum)),
                    )
                    continue

            if skipHeader:
                sequences += [(name, length, fileNum, seqOffset)]
            else:
                sequences += [(name, length, fileNum, headerOffset)]

            if ("progress" in debug) and (progress != None) and (seqNum % progress == 0):
                print >>sys.stderr, "read sequence %d (%s)" % (seqNum, name)

        if fileName != "":
            f.close()

        if "progress" in debug:
            if fileName != "":
                print >>sys.stderr, "finished reading %s" % fileName
            else:
                print >>sys.stderr, "finished reading input file"

                # scan collected sequence info and assign hash values

    numSequences = len(sequences)
    assert numSequences > 0, "input file contains no sequences!"
    if numBuckets == None:
        numBuckets = int(ceil(numSequences / avgBucket))
    if (oddBuckets) and (numBuckets % 1 == 0):
        numBuckets += 1

    sequences = [
        (HsxFile.hash(name) % numBuckets, name, length, fileNum, offset)
        for (name, length, fileNum, offset) in sequences
    ]
    sequences.sort()

    if "progress" in debug:
        print >>sys.stderr, "finished computing hashes"

    if "info" in debug:
        for (hash, name, length, fileNum, offset) in sequences:
            print >>sys.stderr, "%10d==%08X %2d:%08X %s %d" % (HsxFile.hash(name), hash, fileNum, offset, name, length)

            ##########
            # write the index
            ##########

            # decide how we will write the file names

    fileNumToOffset = {}
    fileNumToFastaName = {}
    fileNumToFastaExt = {}
    fileInfoLength = 0

    for fileName in fileNames:
        fileNum = fileNameToNum[fileName]

        fastaName = ""
        fastaExt = "fa"
        if fileName != "":
            dot = fileName.rfind(".")
            fastaExt = fileName[dot + 1 :]
            if not anonymous:
                fastaName = fileName[:dot]

        fileNumToOffset[fileNum] = fileInfoLength
        fileNumToFastaName[fileNum] = fastaName
        fileNumToFastaExt[fileNum] = fastaExt
        fileInfoLength += len(fastaExt) + 1 + len(fastaName) + 1

        # determine header and table sizes

    headerLength = 0x1C
    headerPad = pad_for_16(8 + headerLength)
    headerSize = headerLength + headerPad

    numFiles = len(fileNames)
    fileTableOffset = 0x08 + headerSize
    fileTableLength = numFiles * 4
    fileTablePad = pad_for_16(fileTableLength)
    fileTableSize = fileTableLength + fileTablePad

    fileInfoOffset = fileTableOffset + fileTableSize
    fileInfoPad = pad_for_16(fileInfoLength)
    fileInfoSize = fileInfoLength + fileInfoPad

    hashTableOffset = fileInfoOffset + fileInfoSize
    hashTableLength = (numBuckets + 1) * 5
    hashTablePad = pad_for_16(hashTableLength)
    if "hashpad" in screwup:
        hashTablePad = -1
    hashTableSize = hashTableLength + hashTablePad

    seqTableOffset = hashTableOffset + hashTableSize

    if "file" in debug:
        print >>sys.stderr, "fileTableOffset = %08X (%08X)" % (fileTableOffset, fileTableSize)
        print >>sys.stderr, "fileInfoOffset  = %08X (%08X)" % (fileInfoOffset, fileInfoSize)
        print >>sys.stderr, "hashTableOffset = %08X (%08X)" % (hashTableOffset, hashTableSize)
        print >>sys.stderr, "seqTableOffset  = %08X" % seqTableOffset

        # determine offsets into the sequence table

    nameToOffset = {}

    prevHash = None
    for (hash, name, length, fileNum, offset) in sequences:
        if hash == prevHash:
            continue
        nameToOffset[name] = True

    seqOffset = seqTableOffset
    for (hash, name, length, fileNum, offset) in sequences:
        if name in nameToOffset:
            nameToOffset[name] = seqOffset
        seqOffset += 12 + len(name) + 1
    nameToOffset[""] = seqOffset

    # write header

    write4(HsxFile.magicBig)
    write4(HsxFile.version)

    write4(headerLength)
    write4(numFiles)
    write4(fileTableOffset)
    write4(numBuckets)
    write4(hashTableOffset)
    write4(numSequences)
    write4(seqTableOffset)
    writeZeros(headerPad)

    if "progress" in debug:
        print >>sys.stderr, "finished writing header"

        # write file table and file info

    for fileName in fileNames:
        fileNum = fileNameToNum[fileName]
        write4(fileInfoOffset + fileNumToOffset[fileNum])
    writeZeros(fileTablePad)

    for fileName in fileNames:
        fileNum = fileNameToNum[fileName]
        writeString(fileNumToFastaExt[fileNum])
        writeString(fileNumToFastaName[fileNum])
    writeZeros(fileInfoPad)

    if "progress" in debug:
        print >>sys.stderr, "finished writing file table"

        # write hash table

    msBit5 = 0x80 << (4 * 8)

    prevHash = None

    for (hash, name, length, fileNum, offset) in sequences:
        if hash == prevHash:
            bucketSize += 1
            continue

        if prevHash != None:
            # output previous bucket
            write5(seqOffset)
            if ("progress" in debug) and (progress != None) and ((hash + 1) % progress == 0):
                print >>sys.stderr, "wrote hash bucket %d" % (hash + 1)
                # output intervening empty buckets
            prevHash += 1
            while prevHash < hash:
                write5(msBit5 + nameToOffset[name])
                prevHash += 1
                if ("progress" in debug) and (progress != None) and (prevHash % progress == 0):
                    print >>sys.stderr, "wrote hash bucket %d" % (prevHash)

        bucketSize = 1
        seqOffset = nameToOffset[name]
        prevHash = hash

        # output previous bucket
    write5(seqOffset)
    seqOffset = nameToOffset[""]  # offset past end of sequence index table
    # output intervening empty buckets
    prevHash += 1
    while prevHash < numBuckets:
        write5(msBit5 + seqOffset)
        prevHash += 1
        # output extra bucket
    write5(msBit5 + seqOffset)

    writeZeros(hashTablePad)

    if "progress" in debug:
        print >>sys.stderr, "finished writing hash table"

        # write sequence table

    for (seqNum, (hash, name, length, fileNum, offset)) in enumerate(sequences):
        write5(length)  # length of the sequence
        write1(fileNum)  # file number (index into file table)
        write6(offset)  # offset to the sequence data
        writeString(name)  # name of sequence
        if ("progress" in debug) and (progress != None) and ((seqNum + 1) % progress == 0):
            print >>sys.stderr, "wrote sequence entry %d" % (seqNum + 1)

    if "progress" in debug:
        print >>sys.stderr, "finished writing index"
예제 #4
0
def main():

	##########
	# parse the command line
	##########

	hsxFileName   = None
	seqNames      = []
	warnOnMissing = True
	showProgress  = False
	debug         = []

	args = sys.argv[1:]
	while (len(args) > 0):
		arg = args.pop(0)
		val = None
		fields = arg.split("=",1)
		if (len(fields) == 2):
			arg = fields[0]
			val = fields[1]
			if (val == ""):
				usage("missing a value in %s=" % arg)

		if (arg == "--names") and (val != None):
			f = file(val)
			seqNames += [line.strip() for line in f]
			f.close()
		elif (arg == "--nowarn") and (val == None):
			warnOnMissing = False
		elif (arg == "--progress") and (val == None):
			showProgress = True
		elif (arg == "--debug") and (val == None):
			debug += ["debug"]
		elif (arg == "--debug") and (val != None):
			debug += [val]
		elif (arg.startswith("--")):
			usage("unknown argument: %s" % arg)
		elif (hsxFileName == None) and (val == None):
			hsxFileName = arg
		elif (val == None):
			seqNames += [arg]
		else:
			usage("unknown argument: %s" % arg)

	if (hsxFileName == None): usage("you must give me an hsx file!")
	if (seqNames    == []):   usage("you must give me some sequence names!")

	##########
	# fetch the sequences
	##########

	hsx = HsxFile(hsxFileName,debug=debug)
	for name in seqNames:
		seq = hsx.get_sequence(name)
		if (seq != None):
			print seq
			if (showProgress):
				print >>sys.stderr, name
		elif (warnOnMissing):
			print >>sys.stderr, "WARNING: %s not found" % name
	hsx.close()