def process_normalized(self, raw, normalized): filename = 'archive/{}/{}/normalized.json'.format(raw['source'], raw['docID'], raw['filetype']) if not os.path.exists(os.path.dirname(filename)): os.makedirs(os.path.dirname(filename)) with open(filename, 'w') as f: f.write(json.dumps(json_without_bytes(normalized.attributes), indent=4))
def __init__(self, attributes): # validate a version of the attributes that are safe to check # against the JSON schema jsonschema.validate(json_without_bytes(attributes), self.schema, format_checker=jsonschema.FormatChecker()) self.attributes = attributes
def write(self, source, doc_id, filename, content): filepath = "archive/{}/{}/{}.json".format(source, doc_id, filename) if not os.path.exists(os.path.dirname(filepath)): os.makedirs(os.path.dirname(filepath)) with open(filepath, "w") as f: f.write(json.dumps(json_without_bytes(content), indent=4))
def write(self, source, doc_id, filename, content): filepath = 'archive/{}/{}/{}.json'.format(source, doc_id, filename) if not os.path.exists(os.path.dirname(filepath)): os.makedirs(os.path.dirname(filepath)) with open(filepath, 'w') as f: f.write(json.dumps(json_without_bytes(content), indent=4))
def create(self, attributes): attributes = json_without_bytes(attributes) Document.objects.create( source=attributes['source'], docID=attributes['docID'], providerUpdatedDateTime=None, raw=attributes, normalized=None ).save()
def process_normalized(self, raw, normalized): filename = 'archive/{}/{}/normalized.json'.format( raw['source'], raw['docID'], raw['filetype']) if not os.path.exists(os.path.dirname(filename)): os.makedirs(os.path.dirname(filename)) with open(filename, 'w') as f: f.write( json.dumps(json_without_bytes(normalized.attributes), indent=4))
def process_raw(self, raw): filename = 'archive/{}/{}/raw.{}'.format(raw['source'], raw['docID'], raw['filetype']) if not os.path.exists(os.path.dirname(filename)): os.makedirs(os.path.dirname(filename)) new_attrs = copy.deepcopy(raw.attributes) if new_attrs.get('versions'): new_attrs['versions'] = map(str, new_attrs['versions']) with open(filename, 'w') as f: f.write(json.dumps(json_without_bytes(new_attrs), indent=4))
def __init__(self, attributes, validate=True, clean=False): ''' Initializes a document :param dict attributes: the dictionary representation of a document :param bool validate: If true, the object will be validated before creation :param bool clean: If true, optional fields that are null will be deleted ''' # validate a version of the attributes that are safe to check # against the JSON schema # Allows validation in python3 self.attributes = json_without_bytes(copy.deepcopy(attributes)) if clean: self.attributes = strip_empty(self.attributes, required=self.schema.get('required', [])) if validate: self.validate()
def __init__(self, attributes, validate=True, clean=False): ''' Initializes a document :param dict attributes: the dictionary representation of a document :param bool validate: If true, the object will be validated before creation :param bool clean: If true, optional fields that are null will be deleted ''' # validate a version of the attributes that are safe to check # against the JSON schema # Allows validation in python3 self.attributes = json_without_bytes(copy.deepcopy(attributes)) if clean: self.attributes = strip_empty(self.attributes, required=self.schema.get( 'required', [])) if validate: self.validate()