Beispiel #1
0
	def traverse(self):
		"""
		self.noHeader:	#2012.8.10
		2012.1.9
		"""
		newHeader = []
		key2dataLs = {}	#key is the keyColumn, dataLs corresponds to the sum of each column from valueColumnLs 
		delimiter = None
		noOfDataColumnsFromPriorFiles = 0
		for inputFname in self.inputFnameLs:
			if not os.path.isfile(inputFname):
				if self.exitNonZeroIfAnyInputFileInexistent:
					sys.exit(3)
				else:
					continue
			reader = None
			try:
				inputFile = utils.openGzipFile(inputFname)
				delimiter = figureOutDelimiter(inputFile)
				reader = MatrixFile(inputFile=inputFile, delimiter=delimiter)
			except:
				sys.stderr.write('Except type: %s\n'%repr(sys.exc_info()))
				import traceback
				traceback.print_exc()
			
			valueColumnLs = []
			try:
				header = reader.next()
				self.handleNewHeader(header, newHeader, self.keyColumnLs, valueColumnLs, keyColumnSet=self.keyColumnSet)
				if self.noHeader:	#2012.8.10
					inputFile.seek(0)
					reader = MatrixFile(inputFile=inputFile, delimiter=delimiter)
			except:	#in case something wrong (i.e. file is empty)
				sys.stderr.write('Except type: %s\n'%repr(sys.exc_info()))
				import traceback
				traceback.print_exc()
			
			if reader is not None and valueColumnLs:
				visitedKeySet = set()
				for row in reader:
					try:
						self.handleValueColumns(row, key2dataLs=key2dataLs, keyColumnLs=self.keyColumnLs, \
								valueColumnLs=valueColumnLs, noOfDataColumnsFromPriorFiles=noOfDataColumnsFromPriorFiles, \
								visitedKeySet=visitedKeySet)
					except:	#in case something wrong (i.e. file is empty)
						sys.stderr.write('Ignore this row: %s.\n'%repr(row))
						sys.stderr.write('Except type: %s\n'%repr(sys.exc_info()))
						import traceback
						traceback.print_exc()
				del reader
				#append empty data to keys who are not present in this current "reader" file
				totalKeySet = set(key2dataLs.keys())
				unvisitedKeySet = totalKeySet - visitedKeySet
				for key in unvisitedKeySet:
					for i in valueColumnLs:
						key2dataLs[key].append('')
			noOfDataColumnsFromPriorFiles += len(valueColumnLs)
		if self.noHeader:	#2012.8.10
			newHeader = None
		returnData = PassingData(key2dataLs=key2dataLs, delimiter=delimiter, header=newHeader)
		return returnData
Beispiel #2
0
    def traverse(self):
        """
		2012.1.9
		"""
        newHeader = []
        key2dataLs = {
        }  #key is the keyColumn, dataLs corresponds to the sum of each column from valueColumnLs
        delimiter = None
        for inputFname in self.inputFnameLs:
            if not os.path.isfile(inputFname):
                if self.exitNonZeroIfAnyInputFileInexistent:
                    sys.exit(3)
                else:
                    continue
            reader = None
            try:
                inputFile = utils.openGzipFile(inputFname)
                delimiter = figureOutDelimiter(inputFile)
                reader = MatrixFile(inputFile=inputFile, delimiter=delimiter)
            except:
                sys.stderr.write('Except type: %s\n' % repr(sys.exc_info()))
                import traceback
                traceback.print_exc()

            try:
                #if isCSVReader:
                header = reader.next()
                #else:
                #	header = inputFile.readline().strip().split()	#whatever splits them
                self.handleNewHeader(header,
                                     newHeader,
                                     self.keyColumnLs,
                                     self.valueColumnLs,
                                     keyColumnSet=self.keyColumnSet)
                if self.noHeader:  #2012.8.10
                    inputFile.seek(0)
                    reader = MatrixFile(inputFile=inputFile,
                                        delimiter=delimiter)
            except:  #in case something wrong (i.e. file is empty)
                sys.stderr.write('Except type: %s\n' % repr(sys.exc_info()))
                import traceback
                traceback.print_exc()

            if reader is not None:
                for row in reader:
                    #if not isCSVReader:
                    #	row = row.strip().split()
                    try:
                        self.handleValueColumns(
                            row,
                            key2dataLs=key2dataLs,
                            keyColumnLs=self.keyColumnLs,
                            valueColumnLs=self.valueColumnLs)
                    except:  #in case something wrong (i.e. file is empty)
                        sys.stderr.write('Ignore this row: %s.\n' % repr(row))
                        sys.stderr.write('Except type: %s\n' %
                                         repr(sys.exc_info()))
                        import traceback
                        traceback.print_exc()
                del reader
        if self.noHeader:  #2012.8.10
            newHeader = None
        returnData = PassingData(key2dataLs=key2dataLs,
                                 delimiter=delimiter,
                                 header=newHeader)
        return returnData