예제 #1
0
파일: alert.py 프로젝트: pudo/aleph
 def to_dict(self):
     data = self.to_dict_dates()
     data.update({
         'id': stringify(self.id),
         'query': self.query,
         'normalized': self.normalized,
         'role_id': stringify(self.role_id),
         'notified_at': self.notified_at
     })
     return data
예제 #2
0
파일: permission.py 프로젝트: pudo/aleph
 def to_dict(self):
     data = self.to_dict_dates()
     data.update({
         'id': stringify(self.id),
         'role_id': stringify(self.role_id),
         'collection_id': stringify(self.collection_id),
         'read': self.read,
         'write': self.write
     })
     return data
예제 #3
0
파일: parser.py 프로젝트: pudo/aleph
    def __init__(self, args, authz, limit=None):
        if not isinstance(args, MultiDict):
            args = OrderedMultiDict(args)
        self.args = args
        self.authz = authz
        self.offset = max(0, self.getint('offset', 0))
        if limit is None:
            limit = min(MAX_PAGE, max(0, self.getint('limit', 20)))
        self.limit = limit
        self.text = stringify(self.get('q'))
        self.prefix = stringify(self.get('prefix'))

        # Disable or enable query caching
        self.cache = self.getbool('cache', settings.CACHE)
예제 #4
0
파일: parser.py 프로젝트: pudo/aleph
 def items(self):
     for (key, value) in self.args.items(multi=True):
         if key == 'offset':
             continue
         value = stringify(value, encoding='utf-8')
         if value is not None:
             yield key, value
예제 #5
0
파일: parser.py 프로젝트: pudo/aleph
 def getlist(self, name, default=None):
     values = []
     for value in self.args.getlist(name):
         value = stringify(value, encoding='utf-8')
         if value is not None:
             values.append(value)
     return values or (default or [])
예제 #6
0
파일: ingest_api.py 프로젝트: pudo/aleph
def ingest_upload(collection_id):
    require(request.authz.can(collection_id, request.authz.WRITE))
    sync = get_flag('sync')
    meta, foreign_id = _load_metadata()
    parent_id = _load_parent(collection_id, meta)
    upload_dir = mkdtemp(prefix='aleph.upload.')
    try:
        path = None
        content_hash = None
        for storage in request.files.values():
            path = safe_filename(storage.filename, default='upload')
            path = os.path.join(upload_dir, path)
            storage.save(path)
            content_hash = checksum(path)
        document = Document.by_keys(collection_id=collection_id,
                                    parent_id=parent_id,
                                    foreign_id=foreign_id,
                                    content_hash=content_hash)
        document.update(meta)
        document.schema = Document.SCHEMA
        if content_hash is None:
            document.schema = Document.SCHEMA_FOLDER
        ingest_document(document, path,
                        role_id=request.authz.id,
                        content_hash=content_hash)
    finally:
        shutil.rmtree(upload_dir)

    if document.collection.casefile:
        # Make sure collection counts are always accurate.
        update_document(document, sync=sync)
    return jsonify({
        'status': 'ok',
        'id': stringify(document.id)
    }, status=201)
예제 #7
0
파일: role.py 프로젝트: d1gl3r/aleph
 def update(self, data):
     self.name = data.get('name', self.name)
     self.is_muted = data.get('is_muted', self.is_muted)
     if data.get('password'):
         self.set_password(data.get('password'))
     self.locale = stringify(data.get('locale', self.locale))
     self.updated_at = datetime.utcnow()
예제 #8
0
파일: resolver.py 프로젝트: pudo/aleph
def get(stub, clazz, key):
    """Retrieve an object that has been loaded (or None)."""
    _instrument_stub(stub)
    key = stringify(key)
    if key is None:
        return
    return stub._rx_cache.get((clazz, key))
예제 #9
0
파일: parser.py 프로젝트: kaue-cauin/aleph
 def getlist(self, name, default=None):
     values = []
     for value in self.args.getlist(name):
         value = stringify(value, encoding='utf-8')
         if value:
             values.append(value)
     return values or (default or [])
예제 #10
0
def ingest_upload(collection_id):
    collection = get_db_collection(collection_id, request.authz.WRITE)
    meta, foreign_id = _load_metadata()
    parent = _load_parent(collection, meta)
    upload_dir = ensure_path(mkdtemp(prefix='aleph.upload.'))
    try:
        content_hash = None
        for storage in request.files.values():
            path = safe_filename(storage.filename, default='upload')
            path = upload_dir.joinpath(path)
            storage.save(str(path))
            content_hash = archive.archive_file(path)
        document = Document.save(collection=collection,
                                 parent=parent,
                                 foreign_id=foreign_id,
                                 content_hash=content_hash,
                                 meta=meta,
                                 uploader_id=request.authz.id)
        db.session.commit()
        proxy = document.to_proxy()
        ingest_entity(collection, proxy)
    finally:
        shutil.rmtree(upload_dir)

    return jsonify({
        'status': 'ok',
        'id': stringify(document.id)
    }, status=201)
예제 #11
0
    def ingest(self, file_path, entity):
        entity.schema = model.get('Email')
        msg = Message(file_path)
        self.extract_olefileio_metadata(msg, entity)

        # This property information was sourced from
        # http://www.fileformat.info/format/outlookmsg/index.htm
        # on 2013-07-22.
        headers = msg.getField('007D')
        if headers is not None:
            try:
                msg_headers = Parser().parsestr(headers, headersonly=True)
                self.extract_msg_headers(entity, msg_headers)
            except Exception:
                log.exception("Cannot parse Outlook-stored headers")

        entity.add('bodyText', msg.getField('1000'))
        entity.add('messageId', msg.getField('1035'))
        entity.add('subject', msg.getField('0037'))
        entity.add('threadTopic', msg.getField('0070'))

        # sender name and email
        sender = self.get_identity(msg.getField('0C1A'), msg.getField('0C1F'))
        self.apply_identities(entity, sender, 'emitters', 'sender')

        # received by
        sender = self.get_identity(msg.getField('0040'), msg.getField('0076'))
        self.apply_identities(entity, sender, 'recipients')

        froms = self.get_identities(msg.getField('1046'))
        self.apply_identities(entity, froms, 'emitters', 'from')

        tos = self.get_identities(msg.getField('0E04'))
        self.apply_identities(entity, tos, 'recipients', 'to')

        ccs = self.get_identities(msg.getField('0E03'))
        self.apply_identities(entity, ccs, 'recipients', 'cc')

        bccs = self.get_identities(msg.getField('0E02'))
        self.apply_identities(entity, bccs, 'recipients', 'bcc')

        self.resolve_message_ids(entity)
        for attachment in msg.attachments:
            name = stringify(attachment.longFilename)
            name = name or stringify(attachment.shortFilename)
            self.ingest_attachment(entity, name, attachment.mimeType,
                                   attachment.data)
예제 #12
0
 def to_dict(self):
     data = self.to_dict_dates()
     data['category'] = self.DEFAULT_CATEGORY
     if self.category in self.CATEGORIES:
         data['category'] = self.category
     data['frequency'] = self.DEFAULT_FREQUENCY
     if self.frequency in self.FREQUENCIES:
         data['frequency'] = self.frequency
     data['kind'] = 'casefile' if self.casefile else 'source'
     data.update({
         'id':
         stringify(self.id),
         'collection_id':
         stringify(self.id),
         'foreign_id':
         self.foreign_id,
         'creator_id':
         stringify(self.creator_id),
         'team_id':
         self.team_id,
         'label':
         self.label,
         'summary':
         self.summary,
         'publisher':
         self.publisher,
         'publisher_url':
         self.publisher_url,
         'info_url':
         self.info_url,
         'data_url':
         self.data_url,
         'casefile':
         self.casefile,
         'secret':
         self.secret,
         'xref':
         self.xref,
         'restricted':
         self.restricted,
         'countries':
         registry.country.normalize_set(self.countries),
         'languages':
         registry.language.normalize_set(self.languages),
     })
     return data
예제 #13
0
파일: role.py 프로젝트: rmallof/aleph
 def update(self, data):
     self.name = data.get("name", self.name)
     self.is_muted = data.get("is_muted", self.is_muted)
     self.is_tester = data.get("is_tester", self.is_tester)
     if data.get("password"):
         self.set_password(data.get("password"))
     self.locale = stringify(data.get("locale", self.locale))
     self.touch()
예제 #14
0
파일: olm.py 프로젝트: x0rzkov/aleph
    def ingest(self, file_path, entity):
        entity.schema = model.get('Email')
        try:
            doc = self.parse_xml_path(file_path)
        except TypeError as te:
            raise ProcessingException("Cannot parse OPF XML file.") from te

        if len(doc.findall('//email')) != 1:
            raise ProcessingException("More than one email in file.")

        email = doc.find('//email')
        props = email.getchildren()
        props = {c.tag: stringify(c.text) for c in props if c.text}
        # from pprint import pformat
        # log.info(pformat(props))

        entity.add('subject', props.pop('OPFMessageCopySubject', None))
        entity.add('threadTopic', props.pop('OPFMessageCopyThreadTopic', None))
        entity.add('summary', props.pop('OPFMessageCopyPreview', None))
        # message IDs are already parsed, no need to clean prior:
        entity.add('messageId', props.pop('OPFMessageCopyMessageID', None))
        entity.add('date', self.get_date(props, 'OPFMessageCopySentTime'))
        entity.add('modifiedAt', self.get_date(props, 'OPFMessageCopyModDate'))

        senders = self.get_contacts(email, 'OPFMessageCopySenderAddress')
        self.apply_identities(entity, senders, 'emitters', 'sender')

        froms = self.get_contacts(email, 'OPFMessageCopyFromAddresses')
        self.apply_identities(entity, froms, 'emitters', 'from')

        tos = self.get_contacts(email, 'OPFMessageCopyToAddresses')
        self.apply_identities(entity, tos, 'recipients', 'to')

        ccs = self.get_contacts(email, 'OPFMessageCopyCCAddresses')
        self.apply_identities(entity, ccs, 'recipients', 'cc')

        bccs = self.get_contacts(email, 'OPFMessageCopyBCCAddresses')
        self.apply_identities(entity, bccs, 'recipients', 'bcc')

        entity.add('bodyText', props.pop('OPFMessageCopyBody', None))
        html = props.pop('OPFMessageCopyHTMLBody', None)
        has_html = '1E0' == props.pop('OPFMessageGetHasHTML', None)
        if has_html and stringify(html):
            self.extract_html_content(entity, html, extract_metadata=False)

        self.resolve_message_ids(entity)
예제 #15
0
 def _field_values(self, el, name):
     query = './ns:field[@name="%s"]/ns:value/text()' % name
     values = []
     for value in el.xpath(query, namespaces=self.NSMAP):
         value = stringify(value)
         if value is not None:
             values.append(value)
     return list(sorted(values))
예제 #16
0
 def match_regexp(self, value, q, strict=False):
     """if value matches a regexp q"""
     value = stringify(value)
     mr = re.compile(q)
     if value is not None:
         if mr.match(value):
             return
     self.shout("%r not matching the regexp %r", strict, value, q)
예제 #17
0
    def __init__(self, origin, query_uid=None, match_uid=None):
        self.origin = stringify(origin)
        if self.origin is None:
            raise ValueError("Invalid origin")

        self.log = logging.getLogger('%s.%s' % (project.name, self.origin))
        self.query_uid = query_uid
        self.match_uid = match_uid
예제 #18
0
def sanitize_text(text, encoding=DEFAULT_ENCODING):
    text = stringify(text, encoding_default=encoding)
    text = remove_unsafe_chars(text)
    if text is None:
        return
    text = text.encode(encoding, 'replace')
    text = text.decode(encoding, 'strict')
    return text
예제 #19
0
def key_bytes(key: Any) -> bytes:
    """Convert the given data to a value appropriate for hashing."""
    if isinstance(key, bytes):
        return key
    text = stringify(key)
    if text is None:
        return b""
    return text.encode("utf-8")
예제 #20
0
 def _serialize(self, obj):
     pk = obj.get('id')
     obj['id'] = str(pk)
     obj['links'] = {'self': url_for('alerts_api.view', alert_id=pk)}
     role_id = obj.pop('role_id', None)
     obj['writeable'] = role_id == stringify(request.authz.id)
     # obj['role'] = self.resolve(Role, role_id, RoleSerializer)
     return obj
예제 #21
0
 def file_name(self):
     disposition = self.headers.get('content-disposition')
     file_name = None
     if disposition is not None:
         _, options = cgi.parse_header(disposition)
         filename = options.get('filename') or ''
         file_name = stringify(unquote(filename))
     return file_name
예제 #22
0
    def ingest(self, file_path, entity):
        entity.schema = model.get("Email")
        try:
            doc = self.parse_xml_path(file_path)
        except TypeError as te:
            raise ProcessingException("Cannot parse OPF XML file.") from te

        if len(doc.findall("//email")) != 1:
            raise ProcessingException("More than one email in file.")

        email = doc.find("//email")
        props = email.getchildren()
        props = {c.tag: stringify(c.text) for c in props if c.text}
        # from pprint import pformat
        # log.info(pformat(props))

        entity.add("subject", props.pop("OPFMessageCopySubject", None))
        entity.add("threadTopic", props.pop("OPFMessageCopyThreadTopic", None))
        entity.add("summary", props.pop("OPFMessageCopyPreview", None))
        # message IDs are already parsed, no need to clean prior:
        entity.add("messageId", props.pop("OPFMessageCopyMessageID", None))
        entity.add("date", self.get_date(props, "OPFMessageCopySentTime"))
        entity.add("modifiedAt", self.get_date(props, "OPFMessageCopyModDate"))

        senders = self.get_contacts(email, "OPFMessageCopySenderAddress")
        self.apply_identities(entity, senders, "emitters", "sender")

        froms = self.get_contacts(email, "OPFMessageCopyFromAddresses")
        self.apply_identities(entity, froms, "emitters", "from")

        tos = self.get_contacts(email, "OPFMessageCopyToAddresses")
        self.apply_identities(entity, tos, "recipients", "to")

        ccs = self.get_contacts(email, "OPFMessageCopyCCAddresses")
        self.apply_identities(entity, ccs, "recipients", "cc")

        bccs = self.get_contacts(email, "OPFMessageCopyBCCAddresses")
        self.apply_identities(entity, bccs, "recipients", "bcc")

        entity.add("bodyText", props.pop("OPFMessageCopyBody", None))
        html = props.pop("OPFMessageCopyHTMLBody", None)
        has_html = "1E0" == props.pop("OPFMessageGetHasHTML", None)
        if has_html and stringify(html):
            self.extract_html_content(entity, html, extract_metadata=False)

        self.resolve_message_ids(entity)
예제 #23
0
    def update(self, data, authz):
        self.label = data.get('label', self.label)
        self.summary = data.get('summary', self.summary)
        self.publisher = data.get('publisher', self.publisher)
        self.publisher_url = data.get('publisher_url', self.publisher_url)
        if self.publisher_url is not None:
            self.publisher_url = stringify(self.publisher_url)
        self.info_url = data.get('info_url', self.info_url)
        if self.info_url is not None:
            self.info_url = stringify(self.info_url)
        self.data_url = data.get('data_url', self.data_url)
        if self.data_url is not None:
            self.data_url = stringify(self.data_url)
        self.countries = ensure_list(data.get('countries', self.countries))
        self.countries = [
            registry.country.clean(val) for val in self.countries
        ]  # noqa
        self.languages = ensure_list(data.get('languages', self.languages))
        self.languages = [
            registry.language.clean(val) for val in self.languages
        ]  # noqa
        self.frequency = data.get('frequency', self.frequency)
        self.restricted = data.get('restricted', self.restricted)
        self.xref = data.get('xref', self.xref)

        # Some fields are editable only by admins in order to have
        # a strict separation between source evidence and case
        # material.
        if authz.is_admin:
            self.category = data.get('category', self.category)
            self.casefile = as_bool(data.get('casefile'),
                                    default=self.casefile)
            creator = ensure_dict(data.get('creator'))
            creator_id = data.get('creator_id', creator.get('id'))
            creator = Role.by_id(creator_id)
            if creator is not None:
                self.creator = creator

        if self.casefile:
            self.category = 'casefile'

        self.touch()
        db.session.flush()
        if self.creator is not None:
            Permission.grant(self, self.creator, True, True)
예제 #24
0
 def records(self):
     """Iterate through the table applying filters on-the-go."""
     for url in self.urls:
         for row in self.read_csv(url):
             data = {}
             for ref in self.query.refs:
                 data[ref] = stringify(row.get(ref))
             if self.check_filters(data):
                 yield data
예제 #25
0
def queue(stub, clazz, key, schema=None):
    """Notify the resolver associated with `stub` that the given object
    needs to be retrieved. Multiple calls with the same object signature
    will be merged."""
    _instrument_stub(stub)
    key = stringify(key)
    if key is None:
        return
    stub._rx_queue.add((clazz, key, schema))
예제 #26
0
 def to_dict(self):
     data = self.to_dict_dates()
     data.update({
         "id": stringify(self.id),
         "label": self.label,
         "operation": self.operation,
         "creator_id": stringify(self.creator_id),
         "collection_id": self.collection_id,
         "expires_at": self.expires_at,
         "deleted": self.deleted,
         "status": Status.LABEL.get(self.status),
         "content_hash": self.content_hash,
         "file_size": self.file_size,
         "file_name": self.file_name,
         "mime_type": self.mime_type,
         "meta": self.meta,
     })
     return data
예제 #27
0
def normalize_mime_type(mime_type):
    """Clean up the mime type a bit."""
    mime_type = stringify(mime_type)
    if mime_type is None:
        return None
    mime_type = mime_type.lower()
    if mime_type in ['application/octet-stream']:
        return None
    return mime_type
예제 #28
0
 def is_integer(self, value, strict=False):
     """if value is an integer"""
     if value is not None:
         if isinstance(value, numbers.Number):
             return
     value = stringify(value)
     if value is not None and value.isnumeric():
         return
     self.shout("value %r is not an integer", strict, value)
예제 #29
0
 def validate(self, email, **kwargs):
     """Check to see if this is a valid email address."""
     email = stringify(email)
     if email is None:
         return
     if not self.EMAIL_REGEX.match(email):
         return False
     mailbox, domain = email.rsplit('@', 1)
     return self.domains.validate(domain, **kwargs)
예제 #30
0
def parse(context, data):
    groups = {}
    res = context.http.rehash(data)

    with open(res.file_path, 'r', encoding='iso-8859-1') as csvfile:
        # ignore first line
        next(csvfile)
        for row in csv.DictReader(csvfile):
            group = int(float(row.pop('Group ID')))
            if group not in groups:
                groups[group] = []
            groups[group].append({
                k: stringify(v) if stringify(v) is not None else ''
                for k, v in row.items()
            })

    for group, rows in groups.items():
        context.emit(data={'group': group, 'rows': rows})
예제 #31
0
 def _generate_stream():
     for row in iterable:
         values = []
         for value in row:
             values.append(stringify(value) or '')
         buffer = io.StringIO()
         writer = csv.writer(buffer, dialect='excel', delimiter=',')
         writer.writerow(values)
         yield buffer.getvalue()
예제 #32
0
 def store(self, key, value):
     key = stringify(key)
     if key is not None:
         self.table.upsert(
             {
                 'key': key,
                 'value': json.dumps(value),
                 'timestamp': datetime.utcnow()
             }, ['key'])
예제 #33
0
 def clean(self, text, **kwargs):
     """Create a more clean, but still user-facing version of an
     instance of the type."""
     text = stringify(text)
     if text is not None:
         try:
             return str(ip_address(text))
         except ValueError:
             return None
예제 #34
0
파일: resolver.py 프로젝트: pudo/aleph
def queue(stub, clazz, key, schema=None):
    """Notify the resolver associated with `stub` that the given object
    needs to be retrieved. Multiple calls with the same object signature
    will be merged."""
    _instrument_stub(stub)
    key = stringify(key)
    if key is None:
        return
    stub._rx_queue.add((clazz, key, schema))
예제 #35
0
    def __init__(
        self,
        model: "Model",
        query: "QueryMapping",
        name: str,
        data: Dict[str, Any],
        key_prefix: Optional[str] = None,
    ) -> None:
        self.model = model
        self.name = name

        self.seed = sha1(key_bytes(key_prefix))
        self.seed.update(key_bytes(data.get("key_literal")))

        self.keys = keys_values(data, "key", "keys")
        self.id_column = stringify(data.get("id_column"))
        if not len(self.keys) and self.id_column is None:
            raise InvalidMapping("No keys or ID: %r" % name)
        if len(self.keys) and self.id_column is not None:
            msg = "Please use only keys or id_column, not both: %r" % name
            raise InvalidMapping(msg)

        schema_name = stringify(data.get("schema"))
        if schema_name is None:
            raise InvalidMapping("No schema: %s" % name)
        schema = model.get(schema_name)
        if schema is None:
            raise InvalidMapping("Invalid schema: %s" % schema_name)
        self.schema = schema

        self.refs = set(self.keys)
        if self.id_column:
            self.refs.add(self.id_column)
        self.dependencies: Set[str] = set()
        self.properties: List[PropertyMapping] = []
        for name, prop_mapping in data.get("properties", {}).items():
            prop = self.schema.get(name)
            if prop is None:
                raise InvalidMapping("Invalid property: %s" % name)
            mapping = PropertyMapping(query, prop_mapping, prop)
            self.properties.append(mapping)
            self.refs.update(mapping.refs)
            if mapping.entity:
                self.dependencies.add(mapping.entity)
예제 #36
0
def get_entity_id(obj: Any) -> Optional[str]:
    """Given an entity-ish object, try to get the ID."""
    if is_mapping(obj):
        obj = obj.get("id")
    else:
        try:
            obj = obj.id
        except AttributeError:
            pass
    return stringify(obj)
예제 #37
0
 def by_foreign_id(cls, foreign_id, collection_id, deleted=False):
     foreign_id = stringify(foreign_id)
     if foreign_id is None:
         return None
     q = cls.all(deleted=deleted)
     q = q.filter(Entity.collection_id == collection_id)
     foreign_id = func.cast([foreign_id], ARRAY(db.Unicode()))
     q = q.filter(cls.foreign_ids.contains(foreign_id))
     q = q.order_by(Entity.deleted_at.desc().nullsfirst())
     return q.first()
예제 #38
0
파일: entity.py 프로젝트: pudo/aleph
 def to_dict(self):
     proxy = self.to_proxy()
     data = proxy.to_full_dict()
     data.update(self.to_dict_dates())
     data.update({
         'foreign_id': self.foreign_id,
         'collection_id': stringify(self.collection_id),
         'bulk': False
     })
     return data
예제 #39
0
    def emit(self, text, type, key=None, weight=1):
        "Create a tag, this can be called multiple times with the same text."
        cleaner = self.CLEANERS[type]
        text = stringify(text)
        text = cleaner.clean(text, countries=self.document.countries)
        if text is None:
            return

        key = stringify(key)
        if key is None:
            key = slugify(text, sep='-')

        if key is None:
            return

        if (key, type) not in self.keyed:
            self.keyed[(key, type)] = dict(text=text, weight=weight)
        else:
            self.keyed[(key, type)]['weight'] += weight
예제 #40
0
파일: collection.py 프로젝트: pudo/aleph
 def team_id(self):
     role = aliased(Role)
     perm = aliased(Permission)
     q = db.session.query(role.id)
     q = q.filter(role.type != Role.SYSTEM)
     q = q.filter(role.id == perm.role_id)
     q = q.filter(perm.collection_id == self.id)
     q = q.filter(perm.read == True)  # noqa
     q = q.filter(role.deleted_at == None)  # noqa
     q = q.filter(perm.deleted_at == None)  # noqa
     return [stringify(i) for (i,) in q.all()]
예제 #41
0
파일: fetch.py 프로젝트: pudo/fingerprints
def fetch():
    out_path = os.path.dirname(__file__)
    out_path = os.path.join(out_path, 'fingerprints', 'data', 'types.yml')
    fh = urlopen(CSV_URL)
    types = {}
    for row in unicodecsv.DictReader(fh):
        name = stringify(row.get('Name'))
        abbr = stringify(row.get('Abbreviation'))
        if name is None or abbr is None:
            continue
        if name in types and types[name] != abbr:
            print(name, types[name], abbr)
        types[name] = abbr
        # print abbr, name

    with open(out_path, 'w') as fh:
        yaml.safe_dump({'types': types}, fh,
                       indent=2,
                       allow_unicode=True,
                       canonical=False,
                       default_flow_style=False)
예제 #42
0
파일: collection.py 프로젝트: pudo/aleph
 def to_dict(self):
     data = self.to_dict_dates()
     data['category'] = self.DEFAULT
     if self.category in self.CATEGORIES:
         data['category'] = self.category
     data['kind'] = 'casefile' if self.casefile else 'source'
     data.update({
         'id': stringify(self.id),
         'collection_id': stringify(self.id),
         'foreign_id': self.foreign_id,
         'creator_id': stringify(self.creator_id),
         'team_id': self.team_id,
         'label': self.label,
         'summary': self.summary,
         'publisher': self.publisher,
         'publisher_url': self.publisher_url,
         'info_url': self.info_url,
         'data_url': self.data_url,
         'casefile': self.casefile,
         'secret': self.secret
     })
     return data
예제 #43
0
파일: result.py 프로젝트: pudo/aleph
    def update(self):
        """Apply the outcome of the result to the document."""
        doc = self.document
        if self.status == self.STATUS_SUCCESS:
            doc.status = Document.STATUS_SUCCESS
            doc.error_message = None
        else:
            doc.status = Document.STATUS_FAIL
            doc.error_message = stringify(self.error_message)

        schema = model['Document']
        for flag, name in self.SCHEMATA:
            if flag in self.flags:
                schema = model[name]

        doc.schema = schema.name
        doc.foreign_id = safe_string(self.id)
        doc.content_hash = self.checksum or doc.content_hash
        doc.pdf_version = self.pdf_checksum
        doc.title = self.title or doc.meta.get('title')
        doc.file_name = self.file_name or doc.meta.get('file_name')
        doc.file_size = self.size or doc.meta.get('file_size')
        doc.summary = self.summary or doc.meta.get('summary')
        doc.author = self.author or doc.meta.get('author')
        doc.generator = self.generator or doc.meta.get('generator')
        doc.mime_type = self.mime_type or doc.meta.get('mime_type')
        doc.encoding = self.encoding or doc.meta.get('encoding')
        doc.date = self.date or doc.meta.get('date')
        doc.authored_at = self.created_at or doc.meta.get('authored_at')
        doc.modified_at = self.modified_at or doc.meta.get('modified_at')
        doc.published_at = self.published_at or doc.meta.get('published_at')
        doc.message_id = self.message_id or doc.meta.get('message_id')
        doc.in_reply_to = ensure_list(self.in_reply_to)
        doc.columns = list(self.columns.keys())
        doc.body_raw = self.body_html
        doc.body_text = self.body_text
        doc.headers = self.headers

        for kw in self.keywords:
            doc.add_keyword(safe_string(kw))
        for lang in self.languages:
            doc.add_language(safe_string(lang))

        db.session.flush()

        collector = DocumentTagCollector(doc, 'ingestors')
        for entity in self.entities:
            collector.emit(entity, DocumentTag.TYPE_PERSON)
        for email in self.emails:
            collector.emit(email, DocumentTag.TYPE_EMAIL)
        collector.save()
예제 #44
0
파일: role.py 프로젝트: pudo/aleph
 def to_dict(self):
     data = self.to_dict_dates()
     data.update({
         'id': stringify(self.id),
         'type': self.type,
         'name': self.name,
         'label': self.label,
         'email': self.email,
         'api_key': self.api_key,
         'is_admin': self.is_admin,
         'is_muted': self.is_muted,
         'has_password': self.has_password,
         # 'notified_at': self.notified_at
     })
     return data
예제 #45
0
파일: ingest_api.py 프로젝트: pudo/aleph
def _load_metadata():
    """Unpack the common, pre-defined metadata for all the uploaded files."""
    try:
        meta = json.loads(request.form.get('meta', '{}'))
    except Exception as ex:
        raise BadRequest(str(ex))

    validate_data(meta, DocumentCreateSchema)
    foreign_id = stringify(meta.get('foreign_id'))
    if not len(request.files) and foreign_id is None:
        raise BadRequest(response=jsonify({
            'status': 'error',
            'message': 'Directories need to have a foreign_id'
        }, status=400))
    return meta, foreign_id
예제 #46
0
 def to_proxy(self):
     if self.text is not None:
         proxy = model.make_entity(self.SCHEMA_PAGE)
         proxy.make_id('record', self.id)
         proxy.set('document', self.document_id)
         proxy.set('index', self.index)
         proxy.set('bodyText', stringify(self.text))
         return proxy
     else:
         proxy = model.make_entity(self.SCHEMA_ROW)
         proxy.make_id('record', self.id)
         proxy.set('table', self.document_id)
         proxy.set('index', self.index)
         if self.data is not None:
             # sort values by columns
             values = [
                 self.data.get(k) for k in self.document.meta.get('columns')
             ]
             proxy.set('cells', registry.json.pack(values))
         return proxy
예제 #47
0
파일: alert.py 프로젝트: pudo/aleph
 def create(cls, data, role_id):
     alert = cls()
     alert.role_id = role_id
     alert.query = stringify(data.get('query'))
     alert.update()
     return alert