예제 #1
0
파일: storage.py 프로젝트: erinspace/scrapi
    def process_normalized(self, raw, normalized):
        filename = 'archive/{}/{}/normalized.json'.format(raw['source'], raw['docID'], raw['filetype'])
        if not os.path.exists(os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))

        with open(filename, 'w') as f:
            f.write(json.dumps(json_without_bytes(normalized.attributes), indent=4))
예제 #2
0
    def __init__(self, attributes):
        # validate a version of the attributes that are safe to check
        # against the JSON schema
        jsonschema.validate(json_without_bytes(attributes), self.schema,
                            format_checker=jsonschema.FormatChecker())

        self.attributes = attributes
예제 #3
0
파일: storage.py 프로젝트: hmoco/scrapi
    def write(self, source, doc_id, filename, content):
        filepath = "archive/{}/{}/{}.json".format(source, doc_id, filename)

        if not os.path.exists(os.path.dirname(filepath)):
            os.makedirs(os.path.dirname(filepath))

        with open(filepath, "w") as f:
            f.write(json.dumps(json_without_bytes(content), indent=4))
예제 #4
0
파일: storage.py 프로젝트: zamattiac/scrapi
    def write(self, source, doc_id, filename, content):
        filepath = 'archive/{}/{}/{}.json'.format(source, doc_id, filename)

        if not os.path.exists(os.path.dirname(filepath)):
            os.makedirs(os.path.dirname(filepath))

        with open(filepath, 'w') as f:
            f.write(json.dumps(json_without_bytes(content), indent=4))
예제 #5
0
 def create(self, attributes):
     attributes = json_without_bytes(attributes)
     Document.objects.create(
         source=attributes['source'],
         docID=attributes['docID'],
         providerUpdatedDateTime=None,
         raw=attributes,
         normalized=None
     ).save()
예제 #6
0
파일: storage.py 프로젝트: felliott/scrapi
    def process_normalized(self, raw, normalized):
        filename = 'archive/{}/{}/normalized.json'.format(
            raw['source'], raw['docID'], raw['filetype'])
        if not os.path.exists(os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))

        with open(filename, 'w') as f:
            f.write(
                json.dumps(json_without_bytes(normalized.attributes),
                           indent=4))
예제 #7
0
파일: storage.py 프로젝트: erinspace/scrapi
    def process_raw(self, raw):
        filename = 'archive/{}/{}/raw.{}'.format(raw['source'], raw['docID'], raw['filetype'])
        if not os.path.exists(os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))

        new_attrs = copy.deepcopy(raw.attributes)
        if new_attrs.get('versions'):
            new_attrs['versions'] = map(str, new_attrs['versions'])

        with open(filename, 'w') as f:
            f.write(json.dumps(json_without_bytes(new_attrs), indent=4))
예제 #8
0
파일: storage.py 프로젝트: felliott/scrapi
    def process_raw(self, raw):
        filename = 'archive/{}/{}/raw.{}'.format(raw['source'], raw['docID'],
                                                 raw['filetype'])
        if not os.path.exists(os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))

        new_attrs = copy.deepcopy(raw.attributes)
        if new_attrs.get('versions'):
            new_attrs['versions'] = map(str, new_attrs['versions'])

        with open(filename, 'w') as f:
            f.write(json.dumps(json_without_bytes(new_attrs), indent=4))
예제 #9
0
    def __init__(self, attributes, validate=True, clean=False):
        ''' Initializes a document

            :param dict attributes: the dictionary representation of a document
            :param bool validate: If true, the object will be validated before creation
            :param bool clean: If true, optional fields that are null will be deleted
        '''
        # validate a version of the attributes that are safe to check
        # against the JSON schema

        # Allows validation in python3
        self.attributes = json_without_bytes(copy.deepcopy(attributes))
        if clean:
            self.attributes = strip_empty(self.attributes, required=self.schema.get('required', []))
        if validate:
            self.validate()
예제 #10
0
    def __init__(self, attributes, validate=True, clean=False):
        ''' Initializes a document

            :param dict attributes: the dictionary representation of a document
            :param bool validate: If true, the object will be validated before creation
            :param bool clean: If true, optional fields that are null will be deleted
        '''
        # validate a version of the attributes that are safe to check
        # against the JSON schema

        # Allows validation in python3
        self.attributes = json_without_bytes(copy.deepcopy(attributes))
        if clean:
            self.attributes = strip_empty(self.attributes,
                                          required=self.schema.get(
                                              'required', []))
        if validate:
            self.validate()