Beispiel #1
0
    def run(self, job: Job):
        logging.debug(job.to_json())

        step = job.get_current_step()
        system = current_system()

        source = get_file_by_url(step.options.source_url)
        source_url = source.parsed_url
        source_path = source_url.path

        cut = system.cut_storage
        cut_filename = uuid.uuid4().hex
        cut_path = os.path.join(resolve_path(cut.root_path), cut_filename)

        if source_url.scheme == 'card':
            if step.options.source_root_path is not None:
                source_path = os.path.join(step.options.source_root_path,
                                           source_path[1:])
        else:
            raise Exception("Only support card here")

        filecopy = FileCopy(
            source=source_path,
            destination=cut_path,
            link=False,
            remove_source=False,
        )
        filecopy.run()

        step.result = ToCutResult(path=cut_path)
        step.status = StepStatus.done
Beispiel #2
0
    def run(self, job: Job):
        logging.debug(job.to_json())

        step = job.get_current_step()

        cut = job.get_step('to_cut')

        if cut is not None:
            source_path = cut.result.path
            if os.path.exists(source_path):
                os.remove(source_path)

        step.result = CleanCutResult()
        step.status = StepStatus.done
Beispiel #3
0
    def run(self, job: Job):
        logging.debug(job.to_json())

        step = job.get_current_step()

        if step.options.path is not None:
            path = step.options.path
        else:
            cut = job.get_step('to_cut')
            path = cut.result.path

        ref = calculate_hash(path)

        step.result = CalculateHashResult(calculated_hash=ref)
        step.status = StepStatus.done
Beispiel #4
0
    def run_scan(self, name, root_path):
        # Scan the root path for files matching the filter
        system = current_system()
        source = next((t for t in self.sources if t.name == name), None)
        if source is None:
            logging.debug("No source for '%s'", None)
            return

        prios = {x: n for (n, x) in enumerate(source.extension)}
        def prio(x): return prios[os.path.splitext(x)[1][1:].lower()]

        scanner = FolderScanner(root_path, extensions=source.extension)
        collected = {}
        for file_path in scanner.scan():
            if not '.' in file_path:
                continue

            url = source.get_path_url(file_path)
            if system.select('file').has(url):
                logging.info('Has %s already', url)
                continue

            stem, _ = os.path.splitext(file_path)
            if stem in collected.keys():
                collected[stem].append(file_path)
            else:
                collected[stem] = [file_path]

            if len(collected) >= 100: break

        logging.info('Collected %d files.', len(collected))

        # Create entries and import jobs for each found file
        for _, file_paths in sorted(collected.items(), key=lambda x: x[0]):
            logging.debug("Importing %s", ' + '.join(file_paths))

            parts = []
            for file_path in sorted(file_paths, key=prio):
                full_path = os.path.join(root_path, file_path)
                mime_type, is_raw = guess_mime_type(full_path)

                parts.append(RegisterPart(
                    server=system.hostname,
                    source=source.name,
                    root_path=root_path,
                    path=file_path,
                    is_raw=is_raw,
                    mime_type=mime_type,
                ))

            yield Job(
                steps=[
                    Register.AsStep(
                        parts=parts,
                    )
                ]
            )
Beispiel #5
0
    def run(self, job: Job):
        logging.debug(job.to_json())

        step = job.get_current_step()

        if step.options.path is not None:
            path = step.options.path
        else:
            cut = job.get_step('to_cut')
            path = cut.result.path

        analyse = get_analyser(step.options.mime_type)

        if analyse is None:
            logging.info("Found no metadata analyser for %s",
                         step.options.mime_type)
            step.status = StepStatus.done
            return

        metadata = analyse(path)

        if metadata is not None and step.options.entry_id is not None:

            @retry()
            def update():
                entry = get_entry_by_id(step.options.entry_id)
                if entry.metadata is None:
                    entry.metadata = metadata
                else:
                    entry.metadata.merge(metadata)
                update_entry_by_id(entry.id, entry)

            update()

        step.result = ReadMetadataResult(metadata=metadata)
        step.status = StepStatus.done
Beispiel #6
0
    def run(self, job: Job):
        logging.debug(job.to_json())

        step = job.get_current_step()
        entry = get_entry_by_id(step.options.entry_id)
        source = get_file_by_url(step.options.source_url)

        if step.options.path is not None:
            source_path = step.options.path
        else:
            cut = job.get_step('to_cut')
            source_path = cut.result.path

        assert source_path, "Missing source path (forgot to_cut step?)"

        if step.options.reference is not None:
            reference = step.options.reference
        else:
            calculate_hash = job.get_step('calculate_hash')
            reference = calculate_hash.result.calculated_hash

        assert source_path, "Missing reference (forgot calculate_reference step?)"

        metadata_step = job.get_step('read_metadata')
        if metadata_step.result is not None and metadata_step.result.metadata is not None:
            metadata = metadata_step.result.metadata
        else:
            metadata = entry.metadata

        if metadata is not None and hasattr(
                metadata, 'taken_ts') and metadata.taken_ts is not None:
            taken_ts = metadata.taken_ts[:10]
        else:
            taken_ts = datetime.datetime.fromtimestamp(
                os.path.getmtime(source_path)).strftime('%Y-%m-%d')

        system = current_system()

        file_ref = next(
            (fr for fr in entry.files if fr.reference == source.reference),
            None)
        purpose = file_ref.purpose if file_ref is not None else FilePurpose.unknown

        main_root = resolve_path(system.main_storage.root_path)
        filename = os.path.basename(step.options.source_url)
        parts = [
            main_root, entry.type.value, purpose.value, taken_ts, filename
        ]
        main_path = os.path.join(*parts)

        logging.info("Source: %s", str(source.reference))
        logging.info("Main path: %s", str(main_path))

        filecopy = FileCopy(
            source=source_path,
            destination=main_path,
            link=True,
            remove_source=False,
        )
        filecopy.run()

        @retry()
        def push():
            entry = get_entry_by_id(step.options.entry_id)
            new_file_ref = FileReference(
                purpose=file_ref.purpose,
                version=file_ref.version,
                reference=reference,
                mime_type=file_ref.mime_type,
            )
            new_file = File(
                reference=reference,
                url=system.main_storage.get_file_url(main_path),
                mime_type=source.mime_type,
                status=FileStatus.managed,
            )
            create_file(new_file)

            entry.files.append(new_file_ref)
            update_entry_by_id(entry.id, entry)

        push()

        step.result = ToMainResult()
        step.status = StepStatus.done
Beispiel #7
0
    def run(self, job: Job):
        logging.debug(job.to_json())

        step = job.get_current_step()
        entry = get_entry_by_id(step.options.entry_id)
        source = get_file_by_url(step.options.source_url)
        source_ref = entry.get_file_reference(source.reference)

        if step.options.path is not None:
            source_path = step.options.path
        else:
            cut = job.get_step('to_cut')
            source_path = cut.result.path

        transcoder = get_transcoder(source.mime_type, 'proxy')

        if transcoder is None:
            step.status = StepStatus.done
            return

        targets = [
            transcoder.Options(entry=entry,
                               cut_source=source_path,
                               purpose=FilePurpose.proxy),
            transcoder.Options(entry=entry,
                               cut_source=source_path,
                               purpose=FilePurpose.thumb),
            transcoder.Options(entry=entry,
                               cut_source=source_path,
                               purpose=FilePurpose.check),
        ]

        filerefs = []
        for target in targets:
            f = transcoder.run(target)
            filerefs.append(
                FileReference(
                    purpose=target.purpose,
                    version=source_ref.version,
                    reference=f.reference,
                    mime_type=f.mime_type,
                ))

        logging.info(filerefs)

        @retry()
        def push():
            entry = get_entry_by_id(step.options.entry_id)

            for file_ref in filerefs:
                new_file_ref = FileReference(
                    purpose=file_ref.purpose,
                    version=file_ref.version,
                    reference=file_ref.reference,
                    mime_type=file_ref.mime_type,
                )
                entry.files.append(new_file_ref)

            update_entry_by_id(entry.id, entry)

        push()

        step.result = CreateProxyResult()
        step.status = StepStatus.done
Beispiel #8
0
    def register_parts(self):
        raw = None
        original = None
        derivative = None
        entry = None
        source = None
        root_path = None

        for part in self.options.parts:
            url = part.get_url(self.system)
            root_path = root_path or part.root_path
            f = File(url=url,
                     reference=url,
                     status=FileStatus.new,
                     mime_type=part.mime_type)

            try:
                f = create_file(f)
            except Conflict:
                f = get_file_by_url(url)
                if f.reference is not None:
                    entry = next(
                        iter(get_entries_by_reference(f.reference).entries),
                        None)

            if f is None:
                logging.error('Bad file: %s', f.to_json())

            if part.is_raw:
                raw = f
                source = part.source
            elif raw is None:
                original = f
                source = part.source
            else:
                derivative = f

        primary = raw or original or derivative

        if primary is None:
            logging.error('No valid file!\n%s', self.step.to_json())
            return

        if entry is None:
            entry = Entry(
                type=EntryType.image,
                metadata=DefaultEntryMetadata(
                    original_filename=os.path.basename(primary.url),
                    source=source,
                ),
            )
            entry = create_entry(entry)
            jobs = []
            for f, p in ((raw, FilePurpose.raw), (original,
                                                  FilePurpose.original),
                         (derivative, FilePurpose.derivative)):
                if f is None:
                    continue

                entry.files.append(
                    FileReference(
                        reference=f.reference,
                        purpose=p,
                        version=0,
                        mime_type=f.mime_type,
                    ))

                jobs.append(
                    Job(steps=[
                        ToCut.AsStep(
                            source_root_path=root_path,
                            source_url=f.url,
                        ),
                        CalculateHash.AsStep(),
                        ReadMetadata.AsStep(
                            entry_id=entry.id,
                            mime_type=f.mime_type,
                        ),
                        ToMain.AsStep(
                            entry_id=entry.id,
                            source_url=f.url,
                        ),
                        CreateProxy.AsStep(
                            entry_id=entry.id,
                            source_url=f.url,
                        ),
                        CleanCut.AsStep(),
                    ]))

            update_entry_by_id(entry.id, entry)

            with QueueClient('ipc://job_queue') as q:
                for job in jobs:
                    q.send(job)