Exemple #1
0
    def run(self):
        for sync_item in self._sync_list:
            tb_event_files, tb_logdirs, tb_root = self._find_tfevent_files(
                sync_item)
            if os.path.isdir(sync_item):
                files = os.listdir(sync_item)
                filtered_files = list(
                    filter(lambda f: f.endswith(WANDB_SUFFIX), files))
                if tb_root is None and (check_and_warn_old(files)
                                        or len(filtered_files) != 1):
                    print("Skipping directory: {}".format(sync_item))
                    continue
                if len(filtered_files) > 0:
                    sync_item = os.path.join(sync_item, filtered_files[0])
            sync_tb = self._setup_tensorboard(tb_root, tb_logdirs,
                                              tb_event_files, sync_item)
            # If we're syncing tensorboard, let's use a tmp dir for images etc.
            root_dir = TMPDIR.name if sync_tb else os.path.dirname(sync_item)
            sm = sender.SendManager.setup(root_dir)
            if sync_tb:
                self._send_tensorboard(tb_root, tb_logdirs, sm)
                continue

            ds = datastore.DataStore()
            try:
                ds.open_for_scan(sync_item)
            except AssertionError as e:
                print(".wandb file is empty ({}), skipping: {}".format(
                    e, sync_item))
                continue

            # save exit for final send
            exit_pb = None
            finished = False
            shown = False
            while True:
                data = self._robust_scan(ds)
                if data is None:
                    break
                pb, exit_pb, cont = self._parse_pb(data, exit_pb)
                if exit_pb is not None:
                    finished = True
                if cont:
                    continue
                sm.send(pb)
                # send any records that were added in previous send
                while not sm._record_q.empty():
                    data = sm._record_q.get(block=True)
                    sm.send(data)

                if pb.control.req_resp:
                    result = sm._result_q.get(block=True)
                    result_type = result.WhichOneof("result_type")
                    if not shown and result_type == "run_result":
                        r = result.run_result.run
                        # TODO(jhr): hardcode until we have settings in sync
                        url = "{}/{}/{}/runs/{}".format(
                            self._app_url,
                            url_quote(r.entity),
                            url_quote(r.project),
                            url_quote(r.run_id),
                        )
                        print("Syncing: %s ..." % url, end="")
                        sys.stdout.flush()
                        shown = True
            sm.finish()
            # Only mark synced if the run actually finished
            if self._mark_synced and not self._view and finished:
                synced_file = "{}{}".format(sync_item, SYNCED_SUFFIX)
                with open(synced_file, "w"):
                    pass
            print("done.")
Exemple #2
0
    def run(self):
        for sync_item in self._sync_list:
            if os.path.isdir(sync_item):
                files = os.listdir(sync_item)
                filtered_files = list(
                    filter(lambda f: f.endswith(WANDB_SUFFIX), files))
                if check_and_warn_old(files) or len(filtered_files) != 1:
                    print("Skipping directory: {}".format(sync_item))
                    continue
                sync_item = os.path.join(sync_item, filtered_files[0])
            dirname = os.path.dirname(sync_item)
            files_dir = os.path.join(dirname, "files")
            sd = dict(
                files_dir=files_dir,
                _start_time=0,
                git_remote=None,
                resume=None,
                program=None,
                ignore_globs=(),
                run_id=None,
                entity=None,
                project=None,
                run_group=None,
                job_type=None,
                run_tags=None,
                run_name=None,
                run_notes=None,
                save_code=None,
            )
            settings = settings_static.SettingsStatic(sd)
            record_q = queue.Queue()
            result_q = queue.Queue()
            publish_interface = interface.BackendSender(record_q=record_q)
            sm = sender.SendManager(
                settings=settings,
                record_q=record_q,
                result_q=result_q,
                interface=publish_interface,
            )
            ds = datastore.DataStore()
            ds.open_for_scan(sync_item)

            # save exit for final send
            exit_pb = None
            shown = False

            while True:
                data = ds.scan_data()
                if data is None:
                    break
                pb = wandb_internal_pb2.Record()
                pb.ParseFromString(data)
                record_type = pb.WhichOneof("record_type")
                if self._view:
                    if self._verbose:
                        print("Record:", pb)
                    else:
                        print("Record:", record_type)
                    continue
                if record_type == "run":
                    if self._run_id:
                        pb.run.run_id = self._run_id
                    if self._project:
                        pb.run.project = self._project
                    if self._entity:
                        pb.run.entity = self._entity
                    pb.control.req_resp = True
                elif record_type == "exit":
                    exit_pb = pb
                    continue
                elif record_type == "final":
                    assert exit_pb, "final seen without exit"
                    pb = exit_pb
                    exit_pb = None
                sm.send(pb)
                # send any records that were added in previous send
                while not record_q.empty():
                    data = record_q.get(block=True)
                    sm.send(data)

                if pb.control.req_resp:
                    result = result_q.get(block=True)
                    result_type = result.WhichOneof("result_type")
                    if not shown and result_type == "run_result":
                        r = result.run_result.run
                        # TODO(jhr): hardcode until we have settings in sync
                        url = "{}/{}/{}/runs/{}".format(
                            self._app_url,
                            url_quote(r.entity),
                            url_quote(r.project),
                            url_quote(r.run_id),
                        )
                        print("Syncing: %s ..." % url, end="")
                        sys.stdout.flush()
                        shown = True
            sm.finish()
            if self._mark_synced:
                synced_file = "{}{}".format(sync_item, SYNCED_SUFFIX)
                with open(synced_file, "w"):
                    pass
            print("done.")
Exemple #3
0
    def run(self):
        for sync_item in self._sync_list:
            tb_event_files, tb_logdirs, tb_root = self._find_tfevent_files(sync_item)
            if os.path.isdir(sync_item):
                files = os.listdir(sync_item)
                filtered_files = list(filter(lambda f: f.endswith(WANDB_SUFFIX), files))
                if tb_root is None and (
                    check_and_warn_old(files) or len(filtered_files) != 1
                ):
                    print("Skipping directory: {}".format(sync_item))
                    continue
                if len(filtered_files) > 0:
                    sync_item = os.path.join(sync_item, filtered_files[0])
            root_dir = os.path.dirname(sync_item)
            # If we're syncing tensorboard, let's use a tmpdir
            if tb_event_files > 0 and not sync_item.endswith(WANDB_SUFFIX):
                root_dir = TMPDIR.name
            sm = sender.SendManager.setup(root_dir)

            if tb_root is not None:
                if tb_event_files > 0 and sync_item.endswith(WANDB_SUFFIX):
                    wandb.termwarn(
                        "Found .wandb file, not streaming tensorboard metrics."
                    )
                else:
                    print(
                        "Found {} tfevent files in {}".format(tb_event_files, tb_root)
                    )
                    if len(tb_logdirs) > 3:
                        wandb.termwarn(
                            "Found {} directories containing tfevent files. "
                            "If these represent multiple experiments, sync them "
                            "individually or pass a list of paths."
                        )
                    self._send_tensorboard(tb_root, tb_logdirs, sm)
                    continue
            ds = datastore.DataStore()
            ds.open_for_scan(sync_item)

            # save exit for final send
            exit_pb = None
            shown = False

            while True:
                data = ds.scan_data()
                if data is None:
                    break
                pb, exit_pb, cont = self._parse_pb(data, exit_pb)
                if cont:
                    continue
                sm.send(pb)
                # send any records that were added in previous send
                while not sm._record_q.empty():
                    data = sm._record_q.get(block=True)
                    sm.send(data)

                if pb.control.req_resp:
                    result = sm._result_q.get(block=True)
                    result_type = result.WhichOneof("result_type")
                    if not shown and result_type == "run_result":
                        r = result.run_result.run
                        # TODO(jhr): hardcode until we have settings in sync
                        url = "{}/{}/{}/runs/{}".format(
                            self._app_url,
                            url_quote(r.entity),
                            url_quote(r.project),
                            url_quote(r.run_id),
                        )
                        print("Syncing: %s ..." % url, end="")
                        sys.stdout.flush()
                        shown = True
            sm.finish()
            if self._mark_synced and not self._view:
                synced_file = "{}{}".format(sync_item, SYNCED_SUFFIX)
                with open(synced_file, "w"):
                    pass
            print("done.")