예제 #1
0
def _parseHeaderLine(line):
    headerLine = line[2:]
    splitLine = headerLine.split(':')

    if len(splitLine) != 2:
        raise InvalidFormatError('Header line not understood: ' + repr(headerLine))

    key, val = splitLine
    key = key.lower()
    val = val.strip()

    if key == GENOME_HEADER:
        val = urlDecodePhrase(val)
    elif key not in HEADER_VAR_DICT:
        if key.endswith(' '):
            raise InvalidFormatError('Header variable "%s" must not end with space.' % key)

        # raise InvalidFormatError('Header variable "%s" is not part of the GSuite format.' % key)
        if urlDecodePhrase(key) != key:
            raise InvalidFormatError('Custom header variable names in GSuite do not support URL '
                                     'escaping. Offending header variable: "{}"'.format(key))
    else:
        val = val.lower()

        if val not in HEADER_VAR_DICT[key].allowed:
            raise InvalidFormatError('Value "%s" is not allowed for header "%s". Allowed values: %s' %
                                     (val, key, ', '.join(HEADER_VAR_DICT[key].allowed)))

        if key == FILE_TYPE_HEADER:
            if val == TEXT:
                val = PRIMARY
            elif val == BINARY:
                val = PREPROCESSED

    return key, val
def _parseColumnSpecLine(line):
    colNames = line[3:].lower().split('\t')

    # if any(' ' in colName for colName in colNames):
    #    raise InvalidFormatError('Error in column specification line: %s ' % repr(line) +
    #                             'Please separate columns by tab, not space.')

    colNames = [(col if col not in ALL_STD_COL_NAMES else col)
                for col in colNames]

    for colName in colNames:
        if colNames.count(colName) > 1:
            raise InvalidFormatError(
                'Column "%s" appears multiple times in the ' % colName +
                'column specification line.')

    if colNames[0] == '':
        raise InvalidFormatError(
            'Column specification line requires at least one'
            'column (the "uri" column), but none is specified.')

    if colNames[0] != URI_COL:
        raise InvalidFormatError('The first column must be "%s", not "%s".' %
                                 (URI_COL, colNames[0]))

    if any(colName.strip() == '' for colName in colNames):
        raise InvalidFormatError('Empty column names are not allowed.')

    curOptStdColIdx = -1
    nonStdColsFound = []
    for colName in colNames[1:]:
        if colName in OPTIONAL_STD_COL_NAMES:
            nextOptStdColIdx = OPTIONAL_STD_COL_NAMES.index(colName)

            if nonStdColsFound:
                raise InvalidFormatError(
                    'Non-standard columns "%s" ' % ', '.join(nonStdColsFound) +
                    'encountered before standard column "%s".' % colName)
            elif nextOptStdColIdx <= curOptStdColIdx:
                raise InvalidFormatError(
                    'Standard columns are not in the correct order: '
                    '%s.' % ', '.join('"%s"' % col
                                      for col in OPTIONAL_STD_COL_NAMES))

            curOptStdColIdx = nextOptStdColIdx
        else:
            if urlDecodePhrase(colName) != colName:
                raise InvalidFormatError(
                    'Column names in GSuite do not support URL escaping. '
                    'Offending column name: "{}"'.format(colName))
            nonStdColsFound.append(colName)

    return colNames
예제 #3
0
def _parseGenomeLine(line):
    genomeLine = line[4:]
    splitLine = genomeLine.split('=')

    if len(splitLine) != 2:
        raise InvalidFormatError('Genome line not understood: ' + repr(genomeLine))

    key, genome = [_.strip() for _ in splitLine]
    genome = urlDecodePhrase(genome)
    key = key.lower()

    if key != 'genome':
        raise InvalidFormatError('Key in genome line is not "genome": ' + key)

    return genome
예제 #4
0
 def setCustomHeader(self, headerName, headerVal):
     headerName = headerName.lower()
     self._customHeaders[headerName] = urlDecodePhrase(headerVal)