def parse_comment_string(comment, values=None): metadata = None errors = [] re_key = re.compile(r'^[a-zA-Z0-9_\-]+$') re_value = re.compile(r'^[^\x00-\x20\x7F-\xFF"]+$') if comment.startswith('@@'): _, metastring, comment = comment.split('@@') metadata = {} for key, value in [x.split('=') for x in metastring.strip().split()]: if values: if not key in values.keys(): errors.append('Invalid metadata key "{0}"'.format(key)) elif not values[key](value): errors.append('Invalid metadata value "{0}"'.format(value)) else: if not re_key.match(key): errors.append('Invalid metadata key "{0}"'.format(key)) if not re_value.match(value): errors.append('Invalid metadata value "{0}"'.format(value)) if key in metadata.keys(): metadata[key].append(value) else: metadata[key] = [value] return comment, metadata, errors
def parse_comment_string(comment, values=None): metadata = None errors = [] re_key = re.compile(r'^[a-zA-Z0-9_\-]+$') re_value = re.compile(r'^[^\x00-\x20\x7F-\xFF"]+$') if comment.startswith('@@'): _, metastring, comment = comment.split('@@') metadata = {} for key, value in [x.split('=') for x in metastring.strip().split()]: if values: if not key in values.keys(): errors.append('Invalid metadata key "{0}"'.format(key)) elif not value in values[key]: errors.append('Invalid metadata value "{0}"'.format(value)) else: if not re_key.match(key): errors.append('Invalid metadata key "{0}"'.format(key)) if not re_value.match(value): errors.append('Invalid metadata value "{0}"'.format(value)) if key in metadata.keys(): metadata[key].append(value) else: metadata[key] = [value] return comment, metadata, errors
def iter_csv(csv_file, delimiter=",", quotechar='"'): with open(csv_file, 'rU') as f: csv_reader = csv.reader(f, delimiter=delimiter, quotechar=quotechar) for i, row in enumerate(csv_reader): if i == 0: headers = row if 'identifier' not in headers: sys.stderr.write('ERROR! Missing "identifier" column.\n') sys.exit(1) continue dirty_metadata = dict((k,v) for k,v in zip(headers, row)) metadata = compile_metadata(dirty_metadata) if len(metadata.keys()) <= 1: continue else: yield metadata['identifier'], metadata