def _parseHeaderLine(line): headerLine = line[2:] splitLine = headerLine.split(':') if len(splitLine) != 2: raise InvalidFormatError('Header line not understood: ' + repr(headerLine)) key, val = splitLine key = key.lower() val = val.strip() if key == GENOME_HEADER: val = urlDecodePhrase(val) elif key not in HEADER_VAR_DICT: if key.endswith(' '): raise InvalidFormatError('Header variable "%s" must not end with space.' % key) # raise InvalidFormatError('Header variable "%s" is not part of the GSuite format.' % key) if urlDecodePhrase(key) != key: raise InvalidFormatError('Custom header variable names in GSuite do not support URL ' 'escaping. Offending header variable: "{}"'.format(key)) else: val = val.lower() if val not in HEADER_VAR_DICT[key].allowed: raise InvalidFormatError('Value "%s" is not allowed for header "%s". Allowed values: %s' % (val, key, ', '.join(HEADER_VAR_DICT[key].allowed))) if key == FILE_TYPE_HEADER: if val == TEXT: val = PRIMARY elif val == BINARY: val = PREPROCESSED return key, val
def _parseColumnSpecLine(line): colNames = line[3:].lower().split('\t') # if any(' ' in colName for colName in colNames): # raise InvalidFormatError('Error in column specification line: %s ' % repr(line) + # 'Please separate columns by tab, not space.') colNames = [(col if col not in ALL_STD_COL_NAMES else col) for col in colNames] for colName in colNames: if colNames.count(colName) > 1: raise InvalidFormatError( 'Column "%s" appears multiple times in the ' % colName + 'column specification line.') if colNames[0] == '': raise InvalidFormatError( 'Column specification line requires at least one' 'column (the "uri" column), but none is specified.') if colNames[0] != URI_COL: raise InvalidFormatError('The first column must be "%s", not "%s".' % (URI_COL, colNames[0])) if any(colName.strip() == '' for colName in colNames): raise InvalidFormatError('Empty column names are not allowed.') curOptStdColIdx = -1 nonStdColsFound = [] for colName in colNames[1:]: if colName in OPTIONAL_STD_COL_NAMES: nextOptStdColIdx = OPTIONAL_STD_COL_NAMES.index(colName) if nonStdColsFound: raise InvalidFormatError( 'Non-standard columns "%s" ' % ', '.join(nonStdColsFound) + 'encountered before standard column "%s".' % colName) elif nextOptStdColIdx <= curOptStdColIdx: raise InvalidFormatError( 'Standard columns are not in the correct order: ' '%s.' % ', '.join('"%s"' % col for col in OPTIONAL_STD_COL_NAMES)) curOptStdColIdx = nextOptStdColIdx else: if urlDecodePhrase(colName) != colName: raise InvalidFormatError( 'Column names in GSuite do not support URL escaping. ' 'Offending column name: "{}"'.format(colName)) nonStdColsFound.append(colName) return colNames
def _parseGenomeLine(line): genomeLine = line[4:] splitLine = genomeLine.split('=') if len(splitLine) != 2: raise InvalidFormatError('Genome line not understood: ' + repr(genomeLine)) key, genome = [_.strip() for _ in splitLine] genome = urlDecodePhrase(genome) key = key.lower() if key != 'genome': raise InvalidFormatError('Key in genome line is not "genome": ' + key) return genome
def setCustomHeader(self, headerName, headerVal): headerName = headerName.lower() self._customHeaders[headerName] = urlDecodePhrase(headerVal)