Example #1
0
        self.exporting = False
        self.last_export_failed = None


defaults = {o.name:o.value if o.argcount else None
            for o in parse_defaults(clidoc)}
args = {**defaults, 'export': True, '--jobs': 1, 'schemas': False, 'protcur': False,
        '--no-network': True,  # XXX FIXME we need a way to fetch the data once and then reuse
        '--i-know-what-i-am-doing': True,
        'report': False, 'protocols': False,}  # FIXME separate args for protcur export
options = Options(args, defaults)

project_id = auth.get('remote-organization')
path_source_dir = Path('~/files/sparc-datasets-test').expanduser().resolve()  # FIXME hardcoded  XXX resolve required to avoid mismatches
if not path_source_dir.exists():
    path_source_dir.mkdir(parents=True)

cel = Celery('sparcur-cron',)

cel.conf.worker_hijack_root_logger = False
cel.conf.worker_prefetch_multiplier = 1

log.info(f'STATUS sparcur :id {project_id} :path {path_source_dir}')

# FIXME needed a dedicated worker for the cron queue
cel.conf.task_queues = (
    Queue('cron', Exchange('cron'), routing_key='task.cron',
          #max_priority=100,
          queue_arguments={'x-max-priority': 10},
          ),
    Queue('export', Exchange('export'), routing_key='task.export',
Example #2
0
    @hasSchema.f(sc.SummarySchema, fail=True)
    def data(self, timestamp=None):
        data = self._pipeline_end(timestamp)
        return data  # FIXME we want objects that wrap the output rather than generate it ...

    @hasSchema.f(sc.SummarySchema, fail=True)
    def data_for_export(self, timestamp):
        data = self._pipeline_end(timestamp)
        # NOTE this timestamps the cached data AS INTENDED
        data['prov']['timestamp_export_start'] = timestamp
        return data


_p = Path(tempfile.gettempdir()) / 'asdf'
_p.mkdir(exist_ok=True)  # FIXME XXXXXXXXXXXXXXXXXXXXXXXXXX


def datame(d,
           ca,
           timestamp,
           helpers=None,
           log_level=logging.INFO,
           dp=_p,
           evil=[False],
           dumb=False):
    """ sigh, pickles """
    log_names = ('sparcur', 'idlib', 'protcur', 'orthauth', 'ontquery',
                 'augpathlib', 'pyontutils')
    for log_name in log_names:
        log = logging.getLogger(log_name)
Example #3
0
def write_graphs(sgs, path=None):
    if path is None:
        path = Path(tempfile.tempdir) / 'protcur-individual'

    if not path.exists():
        path.mkdir()

    pp = path / 'published'
    if not pp.exists():
        pp.mkdir()

    hpath = path / 'html'
    if not hpath.exists():
        hpath.mkdir()

    hpp = hpath / 'published'
    if not hpp.exists():
        hpp.mkdir()

    opath = path / 'org'
    if not opath.exists():
        opath.mkdir()

    opp = opath / 'published'
    if not opp.exists():
        opp.mkdir()

    for wg in sgs:
        u = next(wg[:rdf.type:sparc.Protocol])
        published = bool(list(wg[u:TEMP.datasetPublishedDoi:]))
        try:
            pid = idlib.Pio(u)
            base = 'pio-' + pid.identifier.suffix
        except idlib.exc.IdlibError as e:
            pid = None
            base = (u
                    .replace('http://', '')
                    .replace('https://', '')
                    .replace('/', '_')
                    .replace('.', '_'))

        name = base + '.ttl'
        hname = base + '.html'
        oname = base + '.org'

        if published:
            wt_path = pp / name
            wh_path = hpp / hname
            wo_path = opp / oname
        else:
            wt_path = path / name
            wh_path = hpath / hname
            wo_path = opath / oname

        wg.write(wt_path)
        write_html(wg, wh_path)

        if pid is None:
            org = None
        else:
            #if wo_path.exists(): continue  # XXX remove after testing complete
            try:
                org = pid.asOrg()
            except idlib.exc.IdlibError as e:
                org = None

        if org is not None:
            with open(wo_path, 'wt') as f:
                f.write(org)