Esempio n. 1
0
    def __init__(self,
                 collection,
                 feature,
                 distance,
                 tpp='lemm',
                 rootpath=ROOT_PATH):
        feat_dir = os.path.join(rootpath, collection, "FeatureData", feature)
        id_file = os.path.join(feat_dir, "id.txt")
        feat_file = os.path.join(feat_dir, "feature.bin")
        nr_of_images, ndims = map(
            int,
            open(os.path.join(feat_dir, 'shape.txt')).readline().split())

        self.searcher = simpleknn.load_model(feat_file, ndims, nr_of_images,
                                             id_file)
        self.searcher.set_distance(distance)

        tagfile = os.path.join(rootpath, collection, "TextData",
                               "id.userid.%stags.txt" % tpp)
        self.textstore = RecordStore(tagfile)

        self.nr_neighbors = 1000
        self.nr_newtags = 100

        printStatus(
            INFO, "nr_neighbors=%d, nr_newtags=%d" %
            (self.nr_neighbors, self.nr_newtags))
Esempio n. 2
0
	def printSummary(self):
		print '\n'
		if self.args.command in ['status', 'st']:
			terminal.blue('---SUMMARY---')
			print ''
			if len(Dolly.not_cloned) > 0:
				terminal.warning('The following repositories were not cloned')
				for repo in Dolly.not_cloned:
					util.printStatus(repo, False)
			print ''
			if len(Dolly.unpushed) > 0:
				terminal.warning('The following repositories contain unpushed commits')
				for repo in Dolly.unpushed:
					util.printStatus(repo, False)
			else:
				terminal.ok('No unpushed commits')
			print ''
			if len(Dolly.changes) > 0:
				terminal.warning('The following repositories contain uncomitted changes')
				for change in Dolly.changes:
					print '[{0}] {1}'.format(change['repo']['name'], change['change'])
			else:
				terminal.ok('No uncomitted changes')
			print ''
		if len(Dolly.warnings) > 0:
			terminal.warning('Some errors occured')
			for warning in Dolly.warnings:
				print warning
Esempio n. 3
0
def select_and_move():
    u.printStep( 'Find files to copy' )

    desiredFiles = []
    
    # find files in Processed
    fileNames = os.listdir(cfg.trimCastFolder)
    # filter out files that aren't mp3s
    fileNames = [f for f in fileNames if f.endswith('.mp3')]
    
    fData = {}
    for f in fileNames:
        p = path.join(cfg.trimCastFolder, f)
        statinfo = os.stat(p)
        fData[statinfo.st_ctime] = f
    
    
    #   find oldest files
    n = cfg.maxFilesToCopy
    
    for k in fData.keys():
        desiredFiles.append( fData[k] )
        n -= 1
        if n is 0:
            break

    u.printStatus('Found %d files to copy' % len(desiredFiles))
    
    
    #   move to Listening folder
    for f in desiredFiles:
        src = path.join(cfg.trimCastFolder, f)
        dst = path.join(cfg.listeningFolder, namemanip.find_date(f)+'_'+f)
        moveFile(src, dst)
Esempio n. 4
0
def GC(seq):
    """Calculate GC ratio in given sequence"""
    # Check that sequence is non-empty
    seqlen = len(seq)
    if seqlen == 0:
        util.printStatus("WARNING in GCinterval(): sequence is an "
                         "empty string")
        return None

    seq = seq.lower()
    return (seq.count("g") + seq.count("c")) / seqlen
Esempio n. 5
0
def copy_to_ipod():
    ###
    # Copy files from Listening folder to iPod
    u.printStep('Begin copy')
    
    # reserve some space
    desiredFiles = os.listdir(cfg.listeningFolder)
    u.printStatus( 'Making buffer space' )
    try:
        copyFile(path.join(cfg.listeningFolder, desiredFiles[0]), cfg.freeSpaceMagic)
    except IOError, ex:
        u.printWarning("No space on device. Cannot copy any files (%s)" % ex)
        raise ex
Esempio n. 6
0
    def __init__(self, collection, feature, distance, tpp='lemm', rootpath=ROOT_PATH):
        feat_dir = os.path.join(rootpath, collection, "FeatureData", feature)
        id_file = os.path.join(feat_dir, "id.txt")
        feat_file = os.path.join(feat_dir, "feature.bin")
        nr_of_images, ndims = map(int, open(os.path.join(feat_dir,'shape.txt')).readline().split())

        self.searcher = simpleknn.load_model(feat_file, ndims, nr_of_images, id_file)
        self.searcher.set_distance(distance)

        tagfile = os.path.join(rootpath, collection, "TextData", "id.userid.%stags.txt" % tpp)
        self.textstore = RecordStore(tagfile)
        
        self.nr_neighbors = 1000
        self.nr_newtags = 100

        printStatus(INFO, "nr_neighbors=%d, nr_newtags=%d" % (self.nr_neighbors, self.nr_newtags))
Esempio n. 7
0
    def __init__(self, tagfile):
        printStatus('textstore.RecordStore', 'read from %s' % tagfile)
        self.mapping = {}
        self.tag2freq = {}

        for line in open(tagfile): #.readlines():
            print line.strip()
            [photoid, userid, tags] = line.strip().split('\t')
            self.mapping[photoid] = (userid, tags.lower())
            for tag in set(str.split(tags)):
                self.tag2freq[tag] = self.tag2freq.get(tag,0) + 1
             
        self.nr_images = len(self.mapping)
        self.nr_tags = len(self.tag2freq)
 
        print ("-> %d images, %d unique tags" % (self.nr_images, self.nr_tags))           
Esempio n. 8
0
	def visit(self, host):
		pool = Pool(5, init_worker)

		def pr(repo):
			return pool.apply_async(process_repo, (self, repo))

		results = zip(host.tree, map(pr, host.tree))

		for r in results:
			repo, result = r
			project.Project.currentProj += 1
			util.printStatus(repo)
			# Workaround to Python issue 8296 where a SIGINT will
			# lock up the process when no wait time is given.
			result.wait(9999999)

		if host.post_update:
			util.executeCommand(host.post_update)
Esempio n. 9
0
	def visit(self, host):
		pool = Pool(5, init_worker)

		def pr(repo):
			return pool.apply_async(process_repo, (self, repo))

		results = zip(host.tree, map(pr, host.tree))

		for r in results:
			repo, result = r
			project.Project.currentProj += 1
			util.printStatus(repo)
			# Workaround to Python issue 8296 where a SIGINT will
			# lock up the process when no wait time is given.
			result.wait(9999999)

		if host.post_update:
			util.executeCommand(host.post_update)
Esempio n. 10
0
def QualToInt(qual, phred=33):
    """Convert quality character to integer value"""
    # Check that quality is non-empty
    qlen = len(qual)
    if qlen == 0:
        util.printStatus("WARNING in QualToInt(): quality is an "
                         "empty string")
        return None

    # Check that quality is either 33 or 64
    if not phred == 33 and not phred == 64:
        util.printStatus("WARNING in QualToInt(): phred value is not the "
                         "expected 33 or 64")

    quals = []
    for nt in qual:
        quals.append(ord(nt) - phred)

    return quals
Esempio n. 11
0
def QualToInt_interval(qual, phred=33, interval=10):
    """Convert quality character to integer value and calculate average"""
    # Check that interval is positive
    if interval < 1:
        util.printStatus("WARNING in QualToInt_interval(): cannot use an "
                         "interval less than 1. Defaulting to interval = 10")
        interval = 10

    # Check that quality is non-empty
    qlen = len(qual)
    if qlen == 0:
        util.printStatus("WARNING in QualToInt_interval(): quality is an "
                         "empty string")
        return None

    # Check that quality is either 33 or 64
    if not phred == 33 and not phred == 64:
        util.printStatus("WARNING in QualToInt_interval(): phred value is not "
                         "the expected 33 or 64")

    quals = []
    for i in range(0, qlen, interval):
        currQuals = qual[i: i + interval]
        quals.append(np.mean([ord(nt) - phred for nt in currQuals]))

    return quals
Esempio n. 12
0
def GC_interval(seq, interval=10):
    """Calculate GC ratio in given sequence per interval"""
    # Check that interval is positive
    if interval < 1:
        util.printStatus("WARNING in GC_interval(): cannot use an interval "
                         "less than 1. Defaulting to interval = 10")
        interval = 10

    # Check that sequence is non-empty
    seqlen = len(seq)
    if seqlen == 0:
        util.printStatus("WARNING in GC_interval(): sequence is an "
                         "empty string")
        return None

    seq = seq.lower()
    gcs = []
    for i in range(0, seqlen, interval):
        currSeq = seq[i: i + interval]
        cslen = len(currSeq)
        gcs.append((currSeq.count("g") + currSeq.count("c")) / cslen)

    return gcs
Esempio n. 13
0
	def visit(self, host):
		for repo in host.tree:
			project.Project.currentProj += 1
			util.printStatus(repo)
			self.status(repo)
Esempio n. 14
0
def process(options, trainCollection, feature, testCollection):
    rootpath = options.rootpath
    tpp = options.tpp
    distance = options.distance
    k = options.k
    r = options.r
    donefile = options.donefile
    overwrite = options.overwrite
    numjobs = options.numjobs
    job = options.job
    blocksize = options.blocksize

    if options.testset is None:
        testset = testCollection

    test_tag_file = os.path.join(rootpath, testCollection, "TextData",
                                 "id.userid.%stags.txt" % tpp)
    try:
        testStore = RecordStore(test_tag_file)
        resultName = "tagrel"
    except:
        testStore = None
        printStatus(
            INFO,
            "Failed to load %s, will do image auto-tagging" % test_tag_file)
        resultName = "autotagging"

    nnName = distance + "knn"
    resultfile = os.path.join(rootpath, testCollection, resultName, testset,
                              trainCollection,
                              "%s,%s,%d,%s" % (feature, nnName, k, tpp),
                              "id.tagvotes.txt")

    if numjobs > 1:
        resultfile += ".%d.%d" % (numjobs, job)

    if checkToSkip(resultfile, overwrite):
        return 0

    if donefile:
        doneset = set([x.split()[0] for x in open(donefile).readlines()[:-1]])
    else:
        doneset = set()
    printStatus(
        INFO, "%d images have been done already, and they will be ignored" %
        len(doneset))

    test_imset = readImageSet(testCollection, testset, rootpath)
    test_imset = [x for x in test_imset if x not in doneset]
    test_imset = [
        test_imset[i] for i in range(len(test_imset))
        if (i % numjobs + 1) == job
    ]
    test_feat_dir = os.path.join(rootpath, testCollection, 'FeatureData',
                                 feature)
    test_feat_file = BigFile(test_feat_dir)

    learner = TagrelLearner(trainCollection,
                            feature,
                            distance,
                            tpp=tpp,
                            rootpath=rootpath)
    learner.set_nr_neighbors(k)
    learner.set_nr_autotags(r)

    printStatus(
        INFO, "working on %d-%d, %d test images -> %s" %
        (numjobs, job, len(test_imset), resultfile))

    done = 0
    makedirsforfile(resultfile)

    fw = open(resultfile, "w")

    read_time = 0
    test_time = 0
    start = 0

    while start < len(test_imset):
        end = min(len(test_imset), start + blocksize)
        printStatus(INFO, 'processing images from %d to %d' % (start, end - 1))

        s_time = time.time()
        renamed, vectors = test_feat_file.read(test_imset[start:end])
        read_time += time.time() - s_time
        nr_images = len(renamed)
        #assert(len(test_imset[start:end]) == nr_images) # some images may have no visual features available

        s_time = time.time()
        output = [None] * nr_images
        for i in range(nr_images):
            if testStore:
                (qry_userid, qry_tags) = testStore.lookup(renamed[i])
            else:
                qry_userid = None
                qry_tags = None

            tagvotes = learner.estimate(vectors[i], qry_tags, qry_userid)
            output[i] = '%s %s\n' % (renamed[i], " ".join([
                "%s %s" % (tag, niceNumber(vote, 8))
                for (tag, vote) in tagvotes
            ]))
        test_time += time.time() - s_time
        start = end
        fw.write(''.join(output))
        fw.flush()

        done += len(output)

    # done
    printStatus(
        INFO, "%d done. read time %g seconds, test_time %g seconds" %
        (done, read_time, test_time))
    fw.close()
    return 1
Esempio n. 15
0
 def set_nr_neighbors(self, k):
     self.nr_neighbors = k
     printStatus(INFO, "setting nr_neighbors to %d" % k)    
Esempio n. 16
0
 u.printStep('Begin copy')
 
 # reserve some space
 desiredFiles = os.listdir(cfg.listeningFolder)
 u.printStatus( 'Making buffer space' )
 try:
     copyFile(path.join(cfg.listeningFolder, desiredFiles[0]), cfg.freeSpaceMagic)
 except IOError, ex:
     u.printWarning("No space on device. Cannot copy any files (%s)" % ex)
     raise ex
 except KeyboardInterrupt, ex:
     u.printWarning('Interrupt caught, skipping copying step')
     return      ####### Early Return
 
 for f in desiredFiles:
     u.printStatus( 'Copying: %s' % f )
     src = path.join(cfg.listeningFolder, f)
     dst = path.join(cfg.iPodCastFolder, f)
     try:
         # move out of listening folder to ipod
         # hopefully, the move will only occur if there's space
         moveFile(src, dst)
     except IOError, ex:
         u.printWarning( "Warning: Out of space on device (%s)" % ex )
         # failure means it will stay in listening folder for the next iPod sync
     except KeyboardInterrupt, ex:
         u.printWarning('Interrupt caught, not copying any more files')
 
 # free up junk space
 u.printStatus( 'Clearing buffer space' )
 removeFile(cfg.freeSpaceMagic)
Esempio n. 17
0
 def set_nr_autotags(self, k):
     self.nr_newtags = k
     printStatus(INFO, "setting nr_autotags to %d" % k)
Esempio n. 18
0
 def set_nr_neighbors(self, k):
     self.nr_neighbors = k
     printStatus(INFO, "setting nr_neighbors to %d" % k)
Esempio n. 19
0
 def set_nr_autotags(self, k):
     self.nr_newtags = k
     printStatus(INFO, "setting nr_autotags to %d" % k)    
Esempio n. 20
0
 def visit(self, host):
     for repo in host.tree:
         project.Project.currentProj += 1
         util.printStatus(repo)
         self.status(repo)
Esempio n. 21
0
                    help="File is in FASTA format")
parser.add_argument("--gzip", action="store_true",
                    help="File is compressed with GZIP")
parser.add_argument("--gc", action="store_true",
                    help="Calculate GC")
parser.add_argument("-v", "--verbose", action="store_true",
                    help="Verbose output")

args = parser.parse_args()

seqFile = args.fastq
outdir = args.outdir if args.outdir.endswith("/") else args.outdir + "/"

# Check file exists
if not os.path.isfile(seqFile):
    util.printStatus("Sequence input file '" + fn + "' does not exist.")
    util.exitScript()

# Check output directory exists
if not os.path.isdir(outdir):
    util.printStatus("Output directory '" + outdir + "' does not exist.")
    util.exitScript()

# Give warning on file extension
sf_lc = seqFile.lower()
if args.gzip and not sf_lc.endswith(".gz"):
    util.printStatus("WARNING: file does not end in '.gz' - may not "
                     "be a GZIP file")
elif args.fasta and (not sf_lc.endswith(".fasta") and
                     not sf_lc.endswith(".fna") and
                     not sf_lc.endswith(".fa")):
Esempio n. 22
0
def process(options, trainCollection, feature, testCollection):
    rootpath = options.rootpath
    tpp = options.tpp
    distance = options.distance
    k = options.k
    r = options.r
    donefile = options.donefile
    overwrite = options.overwrite
    numjobs = options.numjobs
    job = options.job
    blocksize = options.blocksize

    if options.testset is None:
        testset = testCollection
    
    test_tag_file = os.path.join(rootpath, testCollection, "TextData", "id.userid.%stags.txt"%tpp)
    try:
        testStore = RecordStore(test_tag_file)
        resultName = "tagrel"
    except:
        testStore = None
        printStatus(INFO, "Failed to load %s, will do image auto-tagging" % test_tag_file)
        resultName = "autotagging"

    nnName = distance + "knn"
    resultfile = os.path.join(rootpath, testCollection,resultName,testset,trainCollection,"%s,%s,%d,%s" % (feature,nnName,k,tpp), "id.tagvotes.txt")
    
    if numjobs>1:
        resultfile += ".%d.%d" % (numjobs,job)

    if checkToSkip(resultfile, overwrite):
        return 0

 
    if donefile:
        doneset = set([x.split()[0] for x in open(donefile).readlines()[:-1]])
    else:
        doneset = set()
    printStatus(INFO, "%d images have been done already, and they will be ignored" % len(doneset))
        
    test_imset = readImageSet(testCollection, testset, rootpath)
    test_imset = [x for x in test_imset if x not in doneset]
    test_imset = [test_imset[i] for i in range(len(test_imset)) if (i%numjobs+1) == job]
    test_feat_dir = os.path.join(rootpath, testCollection, 'FeatureData', feature)
    test_feat_file = BigFile(test_feat_dir)

   
    learner = TagrelLearner(trainCollection, feature, distance, tpp=tpp, rootpath=rootpath)
    learner.set_nr_neighbors(k)
    learner.set_nr_autotags(r)
    
    printStatus(INFO, "working on %d-%d, %d test images -> %s" % (numjobs,job,len(test_imset),resultfile))
 
    done = 0
    makedirsforfile(resultfile)
    
    fw = open(resultfile, "w")

    read_time = 0
    test_time = 0
    start = 0

    while start < len(test_imset):
        end = min(len(test_imset), start + blocksize)
        printStatus(INFO, 'processing images from %d to %d' % (start, end-1))

        s_time = time.time()
        renamed, vectors = test_feat_file.read(test_imset[start:end])
        read_time += time.time() - s_time
        nr_images = len(renamed)
        #assert(len(test_imset[start:end]) == nr_images) # some images may have no visual features available

        s_time = time.time()
        output = [None] * nr_images
        for i in range(nr_images):
            if testStore:
                (qry_userid, qry_tags) = testStore.lookup(renamed[i])
            else:
                qry_userid = None
                qry_tags = None

            tagvotes = learner.estimate(vectors[i], qry_tags, qry_userid)
            output[i] = '%s %s\n' % (renamed[i], " ".join(["%s %s" % (tag, niceNumber(vote,8)) for (tag,vote) in tagvotes]))
        test_time += time.time() - s_time
        start = end
        fw.write(''.join(output))
        fw.flush()

        done += len(output)

    # done    
    printStatus(INFO, "%d done. read time %g seconds, test_time %g seconds" % (done, read_time, test_time))
    fw.close()
    return 1