예제 #1
0
def processReportFile (reportFile, statusLabel, keyIndex, norelease, species):
  hash = {}

  f = open(reportFile, "r")
  for line in f:
    line = line.rstrip()
    if line.startswith('Project'):
      continue

    splitArray = line.split('\t')
    keyLabel = splitArray[keyIndex]
    status = splitArray[8]
    assembly = splitArray[9]

    if status == 'revoked' or status == 'replaced':
      continue
    if norelease == 1 and status == 'released':
      continue
    if keyIndex == 5: 
      keyLabel=encodeReportLib.parseFreezeLabel(keyLabel)
    if species == 'all':
      pass
    else:
      if species == 'human' and assembly.startswith('hg'):
        pass
      elif species == 'mouse' and assembly.startswith('mm'):
        pass
      elif species == assembly:
        pass
      else:
        continue

    if not keyLabel in hash:
      hash[keyLabel] = {}
      for i in statusLabel:
        hash[keyLabel][i] = 0
    hash[keyLabel][status] += 1
  f.close()

  if keyIndex == 5:
    sortKey = encodeReportLib.orderFreezeDateLabels(hash.keys())
  else:
    sortKey = sorted(hash)
  # Populate dataArray with the contents of the matrix
  dataArray = []
  for labKey in sortKey:
    array = []
    array.append(labKey)
    for statusKey in statusLabel:
      array.append(hash[labKey][statusKey])
    dataArray.append(array)

  return dataArray
예제 #2
0
def processReportFile (reportFile, keyIndex):
  hash = {}
  labelHash = {}

  f = open(reportFile, "r")
  for line in f:
    line = line.rstrip()
    if line.startswith('Project'):
      continue

    splitArray = line.split('\t')
    keyLabel = splitArray[keyIndex]
    startDate = splitArray[6]
    endDate = splitArray[7]
    status = splitArray[8]

    if keyIndex == 8 and (status == 'revoked' or status == 'replaced'):
      continue

    if keyIndex == 5:
      keyLabel = encodeReportLib.parseFreezeLabel(keyLabel)

    # Convert dates into ints
    submitDate = encodeReportLib.convertDate(startDate)
    releaseDate = encodeReportLib.convertDate(endDate)

    if status == 'released':
      if not isinstance(submitDate, int) or not isinstance(releaseDate, int):
        print >> sys.stderr, "Error: Invalid date: %s" % line
      else:
        if not keyLabel in labelHash:
          labelHash[keyLabel] = 0
        labelHash[keyLabel] += 1

        deltaTime = encodeReportLib.dateIntToObj(releaseDate) - encodeReportLib.dateIntToObj(submitDate)
        days = deltaTime.days
        if (days == 0):
          days = 1
        weeks = days / 7
        if (days % 7 != 0):
          # Adjust by one except when number of days is exactly divisible by 7
          weeks += 1
        if not weeks in hash:
          hash[weeks] = {}
        if not keyLabel in hash[weeks]:
          hash[weeks][keyLabel] = 0
        hash[weeks][keyLabel] += 1
  f.close()

  if keyIndex == 5:
    labels = encodeReportLib.orderFreezeDateLabels(labelHash.keys())
  else:
    tmpLabels = sorted(labelHash.iteritems(), key=operator.itemgetter(1), 
                       reverse=True)
    labels = []
    for i in tmpLabels:
      labels.append(i[0])

  maxWeek = max(hash)
  for i in xrange(1, maxWeek+1):
    if not i in hash:
       hash[i] = {}
    for label in labels:
      if not label in hash[i]:
        hash[i][label] = 0

  # Populate dataArray with the contents of the matrix
  dataArray = []
  for key in sorted(hash):
    array = []
    array.append(key)
    for label in labels:
      array.append(hash[key][label])
    dataArray.append(array)

  return dataArray, labels