def run(self, job: Job): logging.debug(job.to_json()) step = job.get_current_step() system = current_system() source = get_file_by_url(step.options.source_url) source_url = source.parsed_url source_path = source_url.path cut = system.cut_storage cut_filename = uuid.uuid4().hex cut_path = os.path.join(resolve_path(cut.root_path), cut_filename) if source_url.scheme == 'card': if step.options.source_root_path is not None: source_path = os.path.join(step.options.source_root_path, source_path[1:]) else: raise Exception("Only support card here") filecopy = FileCopy( source=source_path, destination=cut_path, link=False, remove_source=False, ) filecopy.run() step.result = ToCutResult(path=cut_path) step.status = StepStatus.done
def run(self, job): logging.info('Starting transfer') assert job is not None, "Job can't be None" assert job.options is not None, "Job Options can't be None" logging.info('Job\n%s', job.to_json()) self.system = current_system() self.options = job.options self.source = get_file_by_url(self.options.source_url) self.cut_path = None self.reference = None router = { 'extract_hash': self.extract_hash, 'extract_metadata': self.extract_metadata, 'to_main': self.to_main, 'create_proxy': self.create_proxy, } for step in self.options.steps: if step not in router: logging.error('No such step: %s', step) break try: router[step]() except GiveUp: logging.error('Gave up during step: %s', step) break
def run(self, options: ImageProxyOptions): logging.info('Starting image transaction.') assert options is not None, "Options can't be None" logging.info('Options\n%s', options.to_json()) entry = options.entry logging.info('Entry\n%s', entry.to_json()) self.system = current_system() self.full_original_file_path = options.cut_source logging.info('Full original file path is %s.', self.full_original_file_path) if hasattr(entry.metadata, 'angle'): angle = entry.metadata.angle else: angle = 0 if hasattr(entry.metadata, 'mirror'): mirror = entry.metadata.mirror else: mirror = 0 product = None if options.purpose == FilePurpose.thumb: product = self.generate_rescaled( entry.type.value, FilePurpose.thumb, self.system.config.get_job_settings('import').thumb_size, angle, mirror, ) elif options.purpose == FilePurpose.proxy: product = self.generate_rescaled( entry.type.value, FilePurpose.proxy, self.system.config.get_job_settings('import').proxy_size, angle, mirror, ) elif options.purpose == FilePurpose.check: product = self.create_check(entry.type.value, angle, mirror) if product is None: logging.info("Nothing to do.") return @retry() def create(): create_file(product) create() logging.info("Generated:\n" + product.to_json()) return product
def do(self): logging.info('Started scanning...') # Look for any device mounted under mount root, having a file <system>.images6 pre_scanner = FolderScanner(current_system().server.mount_root, extensions=['images6']) wanted_filename = '.'.join([current_system().name, 'images6']) for file_path in pre_scanner.scan(): file_path = os.path.join(current_system().server.mount_root, file_path) logging.debug("Found file '%s'", file_path) filename = os.path.basename(file_path) if filename == wanted_filename: with open(file_path) as f: name = f.readlines()[0].strip() path = os.path.dirname(file_path) logging.info('Importing from %s (%s)', path, name) for request in self.run_scan(name, path): yield request
def run_scan(self, name, root_path): # Scan the root path for files matching the filter system = current_system() source = next((t for t in self.sources if t.name == name), None) if source is None: logging.debug("No source for '%s'", None) return prios = {x: n for (n, x) in enumerate(source.extension)} def prio(x): return prios[os.path.splitext(x)[1][1:].lower()] scanner = FolderScanner(root_path, extensions=source.extension) collected = {} for file_path in scanner.scan(): if not '.' in file_path: continue url = source.get_path_url(file_path) if system.select('file').has(url): logging.info('Has %s already', url) continue stem, _ = os.path.splitext(file_path) if stem in collected.keys(): collected[stem].append(file_path) else: collected[stem] = [file_path] if len(collected) >= 100: break logging.info('Collected %d files.', len(collected)) # Create entries and import jobs for each found file for _, file_paths in sorted(collected.items(), key=lambda x: x[0]): logging.debug("Importing %s", ' + '.join(file_paths)) parts = [] for file_path in sorted(file_paths, key=prio): full_path = os.path.join(root_path, file_path) mime_type, is_raw = guess_mime_type(full_path) parts.append(RegisterPart( server=system.hostname, source=source.name, root_path=root_path, path=file_path, is_raw=is_raw, mime_type=mime_type, )) yield Job( steps=[ Register.AsStep( parts=parts, ) ] )
def get_file_by_reference(reference): result = next(current_system() \ .select('file') \ .view('by_reference', include_docs=True, key=reference), None) if result is None: return None return File.FromDict(result['doc'])
def get_entry_by_source(folder, filename) -> Entry: entry_data = list(current_system().db['entry'].view( 'by_source', key=(folder, filename), include_docs=True )) if len(entry_data) > 0: return Entry.FromDict(entry_data[0]['doc']) else: return None
def get_entries_by_reference(reference): entry_data = current_system() \ .select('entry') \ .view('by_file_reference', reference, include_docs=True) entries = [Entry.FromDict(entry.get('doc')) for entry in entry_data] for entry in entries: entry.calculate_urls() return EntryFeed( count=len(entries), entries=entries, )
def run(self, job): logging.info('Starting register import.') assert job is not None, "Job can't be None" logging.info('Job\n%s', job.to_json()) self.step = job.get_current_step() self.system = current_system() self.options = self.step.options self.register_parts() self.step.status = StepStatus.done return self.step
def create_file(f): return File.FromDict(current_system().select('file').save(f.to_dict()))
def get_urls_by_reference(reference): files = current_system() \ .select('file') \ .view('by_reference', include_docs=False, key=reference) return (urlparse(file['_id']) for file in files)
def get_file_by_url(url): return File.FromDict(current_system().select('file')[url])
def run(self, job: Job): logging.debug(job.to_json()) step = job.get_current_step() entry = get_entry_by_id(step.options.entry_id) source = get_file_by_url(step.options.source_url) if step.options.path is not None: source_path = step.options.path else: cut = job.get_step('to_cut') source_path = cut.result.path assert source_path, "Missing source path (forgot to_cut step?)" if step.options.reference is not None: reference = step.options.reference else: calculate_hash = job.get_step('calculate_hash') reference = calculate_hash.result.calculated_hash assert source_path, "Missing reference (forgot calculate_reference step?)" metadata_step = job.get_step('read_metadata') if metadata_step.result is not None and metadata_step.result.metadata is not None: metadata = metadata_step.result.metadata else: metadata = entry.metadata if metadata is not None and hasattr( metadata, 'taken_ts') and metadata.taken_ts is not None: taken_ts = metadata.taken_ts[:10] else: taken_ts = datetime.datetime.fromtimestamp( os.path.getmtime(source_path)).strftime('%Y-%m-%d') system = current_system() file_ref = next( (fr for fr in entry.files if fr.reference == source.reference), None) purpose = file_ref.purpose if file_ref is not None else FilePurpose.unknown main_root = resolve_path(system.main_storage.root_path) filename = os.path.basename(step.options.source_url) parts = [ main_root, entry.type.value, purpose.value, taken_ts, filename ] main_path = os.path.join(*parts) logging.info("Source: %s", str(source.reference)) logging.info("Main path: %s", str(main_path)) filecopy = FileCopy( source=source_path, destination=main_path, link=True, remove_source=False, ) filecopy.run() @retry() def push(): entry = get_entry_by_id(step.options.entry_id) new_file_ref = FileReference( purpose=file_ref.purpose, version=file_ref.version, reference=reference, mime_type=file_ref.mime_type, ) new_file = File( reference=reference, url=system.main_storage.get_file_url(main_path), mime_type=source.mime_type, status=FileStatus.managed, ) create_file(new_file) entry.files.append(new_file_ref) update_entry_by_id(entry.id, entry) push() step.result = ToMainResult() step.status = StepStatus.done
def run(self): system = current_system() system.zmq_rep_lazy_pirate('trig_import', self.trig_import)
def __init__(self): system = current_system() cards = [config for config in system.config.cards] drops = [config for config in system.config.drops if config.server == system.hostname] self.sources = cards + drops
def get_entry_by_id(id) -> Entry: entry = Entry.FromDict(current_system().db['entry'][id]) entry.calculate_urls() return entry
def update_entry_by_id(id, entry) -> Entry: entry.id = id logging.debug('Updating entry to\n%s', entry.to_json()) entry = Entry.FromDict(current_system().db['entry'].save(entry.to_dict())) return entry
def delete_entry_by_id(id): current_system().db['entry'].delete(id)
def create_entry(ed): if ed.id is None: ed.id = uuid.uuid4().hex logging.debug('Create entry\n%s', ed.to_json()) return Entry.FromDict(current_system().select('entry').save(ed.to_dict()))
def trig_import(): logging.info("Start") current_system().zmq_req_lazy_pirate('trig_import') logging.info("Stop") return {'result': 'ok'}
def get_entries(query=None): if query is None: offset = 0 page_size = 500 date = None state = None delta = 0 reverse = False else: offset = query.offset page_size = query.page_size date = query.date delta = query.delta reverse = query.reverse state = query.state if state is not None: entry_data = current_system().db['entry'].view( 'by_state_and_taken_ts', startkey=(state.value, None), endkey=(state.value, any), include_docs=True, skip=offset, limit=page_size, ) elif date is not None: if date == 'today': date = datetime.date.today() else: date = (int(part) for part in date.split('-', 2)) date = datetime.date(*date) date += datetime.timedelta(days=delta) entry_data = current_system().db['entry'].view( 'by_taken_ts', startkey=(date.year, date.month, date.day), endkey=(date.year, date.month, date.day, any), include_docs=True, skip=offset, limit=page_size, ) else: entry_data = current_system().db['entry'].view( 'by_taken_ts', include_docs=True, skip=offset, limit=page_size, ) entries = [Entry.FromDict(entry.get('doc')) for entry in entry_data] for entry in entries: entry.calculate_urls() return EntryFeed( date=(date.isoformat() if date else None), state=state, count=len(entries), offset=offset, entries=entries if not reverse else list(reversed(entries)), )