def processReportFile (reportFile, statusLabel, keyIndex, norelease, species): hash = {} f = open(reportFile, "r") for line in f: line = line.rstrip() if line.startswith('Project'): continue splitArray = line.split('\t') keyLabel = splitArray[keyIndex] status = splitArray[8] assembly = splitArray[9] if status == 'revoked' or status == 'replaced': continue if norelease == 1 and status == 'released': continue if keyIndex == 5: keyLabel=encodeReportLib.parseFreezeLabel(keyLabel) if species == 'all': pass else: if species == 'human' and assembly.startswith('hg'): pass elif species == 'mouse' and assembly.startswith('mm'): pass elif species == assembly: pass else: continue if not keyLabel in hash: hash[keyLabel] = {} for i in statusLabel: hash[keyLabel][i] = 0 hash[keyLabel][status] += 1 f.close() if keyIndex == 5: sortKey = encodeReportLib.orderFreezeDateLabels(hash.keys()) else: sortKey = sorted(hash) # Populate dataArray with the contents of the matrix dataArray = [] for labKey in sortKey: array = [] array.append(labKey) for statusKey in statusLabel: array.append(hash[labKey][statusKey]) dataArray.append(array) return dataArray
def processReportFile (reportFile, keyIndex): hash = {} labelHash = {} f = open(reportFile, "r") for line in f: line = line.rstrip() if line.startswith('Project'): continue splitArray = line.split('\t') keyLabel = splitArray[keyIndex] startDate = splitArray[6] endDate = splitArray[7] status = splitArray[8] if keyIndex == 8 and (status == 'revoked' or status == 'replaced'): continue if keyIndex == 5: keyLabel = encodeReportLib.parseFreezeLabel(keyLabel) # Convert dates into ints submitDate = encodeReportLib.convertDate(startDate) releaseDate = encodeReportLib.convertDate(endDate) if status == 'released': if not isinstance(submitDate, int) or not isinstance(releaseDate, int): print >> sys.stderr, "Error: Invalid date: %s" % line else: if not keyLabel in labelHash: labelHash[keyLabel] = 0 labelHash[keyLabel] += 1 deltaTime = encodeReportLib.dateIntToObj(releaseDate) - encodeReportLib.dateIntToObj(submitDate) days = deltaTime.days if (days == 0): days = 1 weeks = days / 7 if (days % 7 != 0): # Adjust by one except when number of days is exactly divisible by 7 weeks += 1 if not weeks in hash: hash[weeks] = {} if not keyLabel in hash[weeks]: hash[weeks][keyLabel] = 0 hash[weeks][keyLabel] += 1 f.close() if keyIndex == 5: labels = encodeReportLib.orderFreezeDateLabels(labelHash.keys()) else: tmpLabels = sorted(labelHash.iteritems(), key=operator.itemgetter(1), reverse=True) labels = [] for i in tmpLabels: labels.append(i[0]) maxWeek = max(hash) for i in xrange(1, maxWeek+1): if not i in hash: hash[i] = {} for label in labels: if not label in hash[i]: hash[i][label] = 0 # Populate dataArray with the contents of the matrix dataArray = [] for key in sorted(hash): array = [] array.append(key) for label in labels: array.append(hash[key][label]) dataArray.append(array) return dataArray, labels