예제 #1
0
def compute_target(answers_dset, ans2label, name, cache_root='data/cache'):
    """Augment answers_dset with soft score as label

    ***answers_dset should be preprocessed***

    Write result into a cache file
    """
    target = []
    for ans_entry in answers_dset:
        answers = ans_entry['answers']
        answer_count = {}
        for answer in answers:
            answer_ = answer['answer']
            answer_count[answer_] = answer_count.get(answer_, 0) + 1

        labels = []
        scores = []
        for answer in answer_count:
            if answer not in ans2label:
                continue
            labels.append(ans2label[answer])
            score = get_score(answer_count[answer])
            scores.append(score)

        target.append({
            'question_id': ans_entry['question_id'],
            'image_id': ans_entry['image_id'],
            'labels': labels,
            'scores': scores
        })

    utils.create_dir(cache_root)
    cache_file = os.path.join(cache_root, name + '_target.pkl')
    cPickle.dump(target, open(cache_file, 'wb'))
    return target
def prepare_output_path(output_dir):
    """ Creates a path for the certifications exports directory """
    if not output_dir:
        output_dir = 'exports'
    output_path = os.path.join(output_dir, 'certifications')
    utils.create_dir(output_path)
    return output_path
예제 #3
0
    def _read(self):
        """
            Read the json file and parse it.
        """
        from src.app import App
        do_later = ["app_path", "icons_path", "icons"]
        try:
            with open(self._db_file, 'r') as db_obj:
                data = json.load(db_obj)
                for key, value in data.items():
                    if key not in do_later:
                        setattr(self, key, value)
        except (FileNotFoundError, ValueError, KeyError):
            Logger.error("Application file is broken: {}".format(
                self._db_file))

        self._parse_paths(data["app_path"], "app_path")
        self._parse_paths(data["icons_path"], "icons_path")
        self._parse_icons(data["icons"])

        if len(App.get("only")) == 1 and App.path():
            self.app_path.append(App.path())

        found = self.icons and self.app_path
        if self.force_create_folder and found:
            for icon_path in self.icons_path:
                create_dir(str(icon_path))
            self.dont_install = False
        else:
            self.dont_install = not (found and self.icons_path)

        # NWJS special case
        if self.get_type() == "nwjs" and not self.dont_install:
            self.dont_install = not App.get("nwjs")
def prepare_output_path(output_path):
    """ Set output_path and create a content dir if needed """
    if not output_path:
        output_path = 'exports/gitbook'
    content_path = os.path.join(output_path, 'content')
    utils.create_dir(content_path)
    return output_path
예제 #5
0
def reproduce_taxonomic_classifier_testset(
        pretrained_models: PretrainedModels,
        batch_size: int = 64,
        output_dir: str = "./results/taxonomic") -> None:
    print("Test Taxonomic-Classifier...")
    create_dir(output_dir)

    # Get Taxonomix testset
    dataset = SequenceReadingsDataset(test_type="taxonomic")
    dataloader = DataLoader(dataset,
                            num_workers=4,
                            shuffle=False,
                            batch_size=batch_size)

    # Create Result DataFrame for Taxonomic Classification
    results = pd.DataFrame(
        data={
            "seq_id": dataset.sequence_id,
            "aa": dataset.aa_sequence,
            "stop_codons": dataset.contains_stop,
            "species": dataset.label_species
        })

    # Taxonomic Testset Classification
    logits, results["tax_pred"] = predict(pretrained_models.taxonomic,
                                          dataloader,
                                          pretrained_models.tokenizer,
                                          pretrained_models.device)
    np.save(os.path.join(output_dir, "logits"), logits)

    # Confusion Matrix
    plot_confusion_heatmap(results["species"],
                           results["tax_pred"],
                           os.path.join(output_dir,
                                        "taxonomic_conf-matrix.png"),
                           ["Actual Class"], ["Prediction"],
                           normalize=True)

    # ROC
    plot_roc(logits,
             results["species"].to_numpy(),
             save_fig=os.path.join(output_dir, "taxonomic_ROC.png"))

    # Accuracy
    confusion_matrix = pd.crosstab(results["species"],
                                   results["tax_pred"],
                                   normalize=True).to_numpy()
    accuracy_to_stdout(confusion_matrix, {
        "1": "Bacteria",
        "0": "Virus",
        "2": "Human"
    })

    # Save results and checkout
    results.to_hdf(os.path.join(output_dir, "result_dataframe.h5"),
                   key='classification_results',
                   mode='w',
                   format='table')
    print("... finished. Results are saved in {}\n".format(output_dir))
예제 #6
0
    def test_dir_created(self):
        '''test that a tmp dir is created'''

        utils.create_dir('tmp')

        assert os.path.exists('tmp')

        shutil.rmtree('tmp')
def export_local_files(item_path, io_paths):
    """ Export local files to the new directory """
    if io_paths:
        output_path = os.path.join(io_paths["output"], "artifacts", item_path)
        input_path = os.path.join(io_paths["input"], item_path)
        utils.create_dir(os.path.dirname(output_path))
        if not os.path.exists(output_path) or not filecmp.cmp(input_path, output_path):
            shutil.copy(input_path, output_path)
예제 #8
0
def inventory(certification, exports_dir, output_dir):
    """ Creates an inventory for a specific certification  """
    certs_dir = os.path.join(exports_dir, 'certifications')
    utils.create_dir(output_dir)
    cert_path = verify_certification_path(certification, certs_dir)
    if cert_path:
        output_path = inventory_builder.create_inventory(cert_path, output_dir)
        click.echo('Inventory yaml created at `{0}`'.format(output_path))
예제 #9
0
def certs(certification, data_dir, output_dir):
    """ Create certification yamls """
    utils.create_dir(output_dir)
    certs_dir = os.path.join(data_dir, 'certifications')
    if verify_certification_path(certification, certs_dir):
        output_path = yamls_to_certification.create_yaml_certifications(
            certification, data_dir, output_dir
        )
        click.echo('Certification created in: `{0}`'.format(output_path))
예제 #10
0
    def _download_testsets(self):
        """ Downloads the pretrained classification models for a certain model type.
        """
        download_urls = {
            "frame":
            "https://zenodo.org/record/4306248/files/refseq.tar.gz",
            "taxonomic":
            "https://zenodo.org/record/4306240/files/uniprot.tar.gz",
            "SRR":
            "https://redmine.f4.htw-berlin.de/owncloud/index.php/s/NoXtz6ezSZHPB6T/download",
            "inORF":
            "https://redmine.f4.htw-berlin.de/owncloud/index.php/s/REkM3Zi5K8n9QW2/download"
        }

        def reporthook(count: int, block_size: int, total_size: int) -> None:
            global start_time
            if count == 0:
                start_time = time.time()
                return
            duration = time.time() - start_time
            progress_size = int(count * block_size)
            speed = int(progress_size / (1024 * duration))
            percent = min(int(count * block_size * 100 / total_size), 100)
            sys.stdout.write("\r %d%% | %d MB | %d KB/s" %
                             (percent, progress_size / (1024 * 1024), speed))
            sys.stdout.flush()

        for key in download_urls:
            if key == "frame" and os.path.isfile(
                    "./data/refseq/refseq_ds_all_off-frames_fb_DNA_test.fasta"
            ):
                continue
            if key == "taxonomic" and os.path.isfile(
                    "./data/uniprot/uniprot_swiss-prot_vbh_p100d_w_test.fasta"
            ):
                continue
            if key == "SRR" and os.path.isfile(
                    "./data/srr/SRR2940986_filtered.fasta"):
                continue
            if key == "inORF" and os.path.isfile(
                    "./data/inORF/inORF_unique.fasta"):
                continue
            # Download
            dir_path = "./data"
            create_dir(dir_path)
            file_name = download_urls[key].split("/")[-1]
            file_path = os.path.join(dir_path, file_name)
            urlretrieve(download_urls[key],
                        filename=file_path,
                        reporthook=reporthook)
            print(" - {} successfully downloaded".format(file_name))
            # Unzip
            shutil.unpack_archive(file_path,
                                  extract_dir=dir_path,
                                  format="gztar")
            # Remove downloaded archive
            os.remove(file_path)
def get_file_path(output_dir, system_key, component_key=None):
    """ Creates the path for the directory that will contain the component
    if it doesn't exist and returns the file path of component yaml"""
    filepath = os.path.join(output_dir, system_key)
    filename = 'system.yaml'
    if component_key:
        filepath = os.path.join(filepath, component_key)
        filename = 'component.yaml'
    utils.create_dir(filepath)
    return os.path.join(filepath, filename)
def init_project(output_dir):
    """ Create a new control masonry project template """
    if not output_dir:
        output_dir = 'data'
    output_container, _ = os.path.split(output_dir)
    utils.create_dir(output_container)
    template_dir = get_template_dir()
    copy_to_path = os.path.join(os.getcwd(), output_dir)
    shutil.copytree(template_dir, copy_to_path)
    return output_dir
예제 #13
0
 def generate_data_I(self, ds, save_to_dir=None, prefix='test'):
     """
     Generates augmented images using ImgAug library class.
     The results are persisted in disc.
     """
     create_dir(save_to_dir)
     for imgs_batch, _ in ds.as_numpy_iterator():
         batches = UnnormalizedBatch(images=(imgs_batch*255).astype(np.uint8))
         images_aug = [next(seq.augment_batches(batches, background=True)).images_aug for i in range(5)]
         [imageio.imwrite("%s/%s_%d_%d.png" % (str(save_to_dir), prefix, i, random.randint(0, 1000),), ia_j)
          for i, images in enumerate(images_aug) for ia_j in images]
def prepare_locally_stored_files(element, io_paths):
    """ Prepare the files by moving locally stored files to the `artifacts` directory
    and linking filepaths to that directory """
    item_path = element['url']
    if not ('http://' in item_path or 'https://' in item_path):
        element['url'] = os.path.join('/artifacts', item_path).replace('\\', '/')
        if io_paths:
            output_path = os.path.join(io_paths['output'], 'artifacts', item_path)
            input_path = os.path.join(io_paths['input'], item_path)
            utils.create_dir(os.path.dirname(output_path))
            if not os.path.exists(output_path) or not filecmp.cmp(input_path, output_path):
                shutil.copy(input_path, output_path)
예제 #15
0
    def _download_pretrained_model(self) -> None:
        """ Downloads the pretrained classification models for a certain model type.
        """
        download_urls = {
            "ProtBert": {
                "source": [
                    "https://s3.amazonaws.com/models.huggingface.co/bert/Rostlab/prot_bert/config.json",
                    "https://cdn.huggingface.co/Rostlab/prot_bert/pytorch_model.bin",
                    "https://cdn.huggingface.co/Rostlab/prot_bert/vocab.txt"
                ],
                "frame": [
                    "https://zenodo.org/record/4306420/files/metadata.json",
                    "https://zenodo.org/record/4306420/files/state_dict.pth"
                ],
                "taxonomic": [
                    "https://zenodo.org/record/4306499/files/metadata.json",
                    "https://zenodo.org/record/4306499/files/state_dict.pth"
                ]
            }
        }

        def reporthook(count: int, block_size: int, total_size: int) -> None:
            global start_time
            if count == 0:
                start_time = time.time()
                return
            duration = time.time() - start_time
            progress_size = int(count * block_size)
            speed = int(progress_size / (1024 * duration))
            percent = min(int(count * block_size * 100 / total_size), 100)
            sys.stdout.write("\r %d%% | %d MB | %d KB/s" %
                             (percent, progress_size / (1024 * 1024), speed))
            sys.stdout.flush()

        if not os.path.isdir(self.path):
            url_dict = download_urls[self.type]
            for key in url_dict:
                subfolder_path = os.path.join(self.path, key)
                create_dir(subfolder_path)
                if os.listdir(subfolder_path):
                    print("There are already files in {}.".format(
                        subfolder_path))
                else:
                    print("Downloading pre-trained models")
                    for url in url_dict[key]:
                        file_name = url.split("/")[-1]
                        urlretrieve(url,
                                    filename=os.path.join(
                                        subfolder_path, file_name),
                                    reporthook=reporthook)
                        print(
                            " - {} successfully downloaded".format(file_name))
예제 #16
0
 def collect_references(self, references, output_base_path, relative_base_path):
     for reference in utils.inplace_gen(references):
         path = reference.get('path', 'NONE')
         file_import_path = os.path.join(self.component_directory, path)
         is_local = not ('http://' in file_import_path or 'https://' in file_import_path)
         if os.path.exists(file_import_path) and is_local:
             # Create dir and copy file
             file_output_path = os.path.join(output_base_path, path)
             utils.create_dir(os.path.dirname(file_output_path))
             shutil.copy(file_import_path, file_output_path)
             # Rename path
             file_relative_path = os.path.join(relative_base_path, path)
             reference['path'] = file_relative_path
예제 #17
0
    def test_dir_replaced(self):
        '''test that a tmp dir is replaced if it exists'''

        os.mkdir('tmp')
        os.mkdir('tmp/misc')

        assert os.path.exists('tmp/misc')

        utils.create_dir('tmp')

        assert not os.path.exists('tmp/misc')

        shutil.rmtree('tmp')
예제 #18
0
    def generate_data_K(self, ds, save_to_dir=None, prefix='test', data_gen=None):
        """ Generates augmented images using Keras ImageDataGenerator class.
            The results are persisted in disc."""
        if save_to_dir is None:
            save_to_dir = self.processed_dir
        else:
            create_dir(save_to_dir)

        img_gen = ImageDataGenerator(**data_gen)
        # Create by default 5 new augmented pictures by each original images.
        for img, _ in ds.as_numpy_iterator():
            img_flow = img_gen.flow(img, batch_size=32, save_to_dir=str(save_to_dir), save_prefix=prefix)
            [next(img_flow)[0].astype(np.uint8) for i in range(5)]
def concat_markdowns(markdown_path, output_path):
    """ Add markdown content files to the gitbook directory and make the summary
    file the base summary string in order to join the markdown summary with
    the gitbook generated in this file. """
    for filename in glob.iglob(os.path.join(markdown_path, "*", "*")):
        # Get the output file path and create the directory before copying
        output_filepath = os.path.join(output_path, filename.replace(os.path.join(markdown_path, ""), ""))
        ouput_dir = os.path.dirname(output_filepath)
        utils.create_dir(ouput_dir)
        shutil.copy(filename, output_filepath)
    summary_path = os.path.join(markdown_path, "SUMMARY.md")
    with open(summary_path, "r") as summary_file:
        main_summary = summary_file.read()
    return main_summary
예제 #20
0
 def export_references(self, references, export_dir):
     """ Given a list of references in either list or dict format,
     determin which references were saved locally and saves those to
     the appropriate location in the export directory  """
     if not export_dir:
         return references
     relative_base_path = os.path.join(self.system_key, self.component_key)
     output_base_path = os.path.join(export_dir, relative_base_path)
     utils.create_dir(output_base_path)
     self.collect_references(
         references=references,
         output_base_path=output_base_path,
         relative_base_path=relative_base_path
     )
     return references
예제 #21
0
def docs(export_format, certification, exports_dir, data_dir, output_dir):
    """ Create certification documentation """
    certs_dir = os.path.join(exports_dir, 'certifications')
    cert_path = verify_certification_path(certification, certs_dir)
    markdown_dir = os.path.join(data_dir, 'markdowns')
    if cert_path:
        if export_format == 'gitbook':
            gitbook_output_dir = os.path.join(output_dir, 'gitbook')
            gitbook_markdown_dir = os.path.join(markdown_dir, 'gitbook')
            utils.create_dir(os.path.join(gitbook_output_dir, 'content'))
            output_path = certifications_to_gitbook.create_gitbook_documentation(
                cert_path, gitbook_output_dir, gitbook_markdown_dir
            )
            click.echo('Gitbook Files Created in `{0}`'.format(output_path))
        else:
            click.echo('{0} format is not supported yet...'.format(export_format))
def main(input_img_path, ann_path, output_pred_path, model_path, is_batch,
         model_type):
    """
    Predict Images.
    """
    logger = logging.getLogger(__name__)
    # Create Directory if doesn't exits otherwise remove items inside it.
    create_dir(Path(output_pred_path))

    # collect images path
    input_img_dir = Path(input_img_path)
    imgs_path_test = sorted([
        i.absolute() for i in (input_img_dir / 'test').glob("*.png")
        if i.is_file()
    ])
    # Annotation path
    ann_path_dir = Path(ann_path)
    ann_test_path = ann_path_dir / 'test' / ann_file_name

    imgs_path_test = create_random_list_of_size(imgs_path_test,
                                                len(imgs_path_test) * 3)
    mask = Mask(output_pred_path)

    #    calculate_iou_metric(model, data_generator_test, mask)
    if model_type == 'unet':
        # Load model pre-trained
        data_generator_test = GDXrayDataGenerator(imgs_path_test,
                                                  ann_test_path,
                                                  labels,
                                                  n_classes,
                                                  batch_size=batch_size,
                                                  dim=dim)
        model = Unet(dim,
                     n_classes,
                     n_filters=n_filters,
                     pretrained_weights=model_path)
        predict_unet(model, data_generator_test, mask)
    elif model_type == 'contours':
        data_generator_test = GDXrayDataGenerator(imgs_path_test,
                                                  ann_test_path,
                                                  labels,
                                                  n_classes,
                                                  batch_size=batch_size,
                                                  dim=dim,
                                                  task='binary')
        predict_contours_batch(data_generator_test, mask)
예제 #23
0
    def create_backup_dir(self):
        """Create a backup directory for an application (application_name)."""
        backup_dir = path.join(BACKUP_FOLDER, self.app.name,
                               strftime(BACKUP_FILE_FORMAT), "")
        exists = True
        new_backup_dir = backup_dir
        i = 1

        while exists:
            if path.exists(new_backup_dir):
                new_backup_dir = backup_dir + "_" + str(i)
            if not path.isdir(new_backup_dir):
                Logger.debug("Create new backup folder "
                             "for {}".format(self.app.name))
                create_dir(new_backup_dir)
                exists = False
            i += 1

        self._backup_dir = new_backup_dir
def prepare_local_files(component_dict, ref_key, components_path, output_dir):
    """ Prepare references by saving files referenced locally to certifications
    repository """
    relative_base_path = os.path.join(component_dict['system'], '')
    output_base_path = os.path.join(output_dir, relative_base_path)
    import_base_path = os.path.join(components_path, relative_base_path)
    utils.create_dir(output_base_path)
    for reference in inplace_gen(component_dict.get(ref_key)):
        path = reference.get('url', 'NONE')
        file_import_path = os.path.join(import_base_path, path)
        is_local = not ('http://' in file_import_path or 'https://' in file_import_path)
        if os.path.exists(file_import_path) and is_local:
            # Create dir and copy file
            file_output_path = os.path.join(output_base_path, path)
            utils.create_dir(os.path.dirname(file_output_path))
            shutil.copy(file_import_path, file_output_path)
            # Rename url
            file_relative_path = os.path.join(relative_base_path, path)
            reference['url'] = file_relative_path
    return component_dict.get(ref_key)
def main():
    path = create_dir()
    url = "https://brasil.diplo.de/br-de/service/matriculaconsular/" \
          "2222228?fbclid=IwAR0MojqudTlgMKQjZG7nNqp9gr3-QSKyiiRdY0jeeBY336zm_3yqr_Oc_nc"

    content = scraping_de(url)

    for c in content:
        save_file(pathname=path,
                  filename=c["file_name"],
                  content=requests.get(c["xls_url"]).content)
예제 #26
0
def generate_hed_dataset(input_flist, output_dir):
    if os.path.isfile(input_flist):
        flist = numpy.genfromtxt(input_flist, dtype=numpy.str, encoding='utf-8')

    create_dir(output_dir)

    for path in flist:
        path = os.path.normpath(path)
        outfile = os.path.join(output_dir, path.split("/")[-3], path.split("/")[-2], os.path.basename(path))
        Path(os.path.dirname(outfile)).mkdir(parents=True, exist_ok=True)
        # outfile=arguments_output_dir+path.split("/")[-3]+'/'+path.split("/")[-2]+'/'+os.path.basename(path)

        tensorInput = torch.FloatTensor(
            numpy.array(PIL.Image.open(path).convert('RGB'))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (
                    1.0 / 255.0))

        tensorOutput = estimate(tensorInput)

        PIL.Image.fromarray(
            (tensorOutput.clamp(0.0, 1.0).numpy().transpose(1, 2, 0)[:, :, 0] * 255.0).astype(numpy.uint8)).save(
            outfile)
예제 #27
0
def create_ans2label(occurence, name, cache_root='data/cache'):
    """Note that this will also create label2ans.pkl at the same time

    occurence: dict {answer -> whatever}
    name: prefix of the output file
    cache_root: str
    """
    ans2label = {}
    label2ans = []
    label = 0
    for answer in occurence:
        label2ans.append(answer)
        ans2label[answer] = label
        label += 1

    utils.create_dir(cache_root)

    cache_file = os.path.join(cache_root, name + '_ans2label.pkl')
    cPickle.dump(ans2label, open(cache_file, 'wb'))
    cache_file = os.path.join(cache_root, name + '_label2ans.pkl')
    cPickle.dump(label2ans, open(cache_file, 'wb'))
    return ans2label
예제 #28
0
 def export_references(self, references, export_dir):
     """ Given a list of references in either list or dict format,
     determin which references were saved locally and saves those to
     the appropriate location in the export directory  """
     if not export_dir:
         return references
     relative_base_path = os.path.join(self.system_key, self.component_key)
     output_base_path = os.path.join(export_dir, relative_base_path)
     utils.create_dir(output_base_path)
     for reference in utils.inplace_gen(references):
         path = reference.get('path', 'NONE')
         file_import_path = os.path.join(self.component_directory, path)
         is_local = not ('http://' in file_import_path or 'https://' in file_import_path)
         if os.path.exists(file_import_path) and is_local:
             # Create dir and copy file
             file_output_path = os.path.join(output_base_path, path)
             utils.create_dir(os.path.dirname(file_output_path))
             shutil.copy(file_import_path, file_output_path)
             # Rename path
             file_relative_path = os.path.join(relative_base_path, path)
             reference['path'] = file_relative_path
     return references
예제 #29
0
    def process(args, model, eval_loader):

        model_path = args.input + '/model_epoch%s.pth' % (args.epoch)
        print('loading %s' % model_path)
        print(torch.cuda.current_device())
        model_data = torch.load(model_path)

        # Comment because not using multi-gpu or distributed
        # model = nn.DataParallel(model).cuda()
        model = model.to(args.device)
        model.load_state_dict(model_data.get('model_state', model_data))

        model.train(False)

        logits, qIds = get_logits(args, model, eval_loader, device)
        results = make_json(logits, qIds, eval_loader)
        # results = make_json_with_logits(logits, qIds)
        model_label = '%s%s%d_%s' % (args.model, args.op, args.num_hid,
                                     args.label)
        if args.logits:
            utils.create_dir('logits/' + model_label)
            torch.save(logits,
                       'logits/' + model_label + '/logits%d.pth' % args.index)

        utils.create_dir(args.output)
        model_label += 'epoch%s' % args.epoch
        # out_file = args.output + '/' + args.input.split('/')[-1] + '.json'
        # with open(args.output + '/%s_%s.pkl' % (args.split, model_label), 'wb') as f:
        #     pickle.dump(results, f, protocol=2)
        with open(args.output + '/%s_%s.json' % (args.split, model_label),
                  'w') as f:
            json.dump(results, f)
        if args.model == 'cti':
            results = make_json_with_logits(logits, qIds)
            with open('results/%s_%s_logits.pkl' % (args.model, args.split),
                      'wb') as f:
                pickle.dump(results, f)
def randomTune(config):
    # 'LR': 0.0001,                   # learning rate
    # 'D2G_LR': 0.1,                  # discriminator/generator learning rate ratio
    # 'BETA1': 0.0,                   # adam optimizer beta1
    # 'BETA2': 0.9,                   # adam optimizer beta2
    # 'L1_LOSS_WEIGHT': 1,            # l1 loss weight
    # 'FM_LOSS_WEIGHT': 10,           # feature-matching loss weight
    config.MAX_STEPS = 1500
    config.EVAL_INTERVAL = 80
    config.MAX_EPOCHES = 1
    # config.MAX_STEPS = 3
    experiments = 50
    for i in range(experiments):
        # sample from a Uniform distribution on a log-scale
        # config.LR = 10 ** np.random.uniform(-3, -5)  # Sample learning rate candidates in the range (0.001 to 0.00001)
        # config.D2G_LR = 10 ** np.random.uniform(-2,
        #                                         0)  # Sample regularization candidates in the range (0.01 to 0.0001)
        # config.LR = 0.0001
        # config.D2G_LR =0.1
        # # config.LR=0.0001

        # config.PATH = './checkpoints/places2_tune_%d_%f%f_' % (i, config.LR, config.D2G_LR)
        # logdir= config.PATH+('/log_%s_%s' % (config.LR , config.D2G_LR))
        create_dir(config.PATH)
        if TRAIN_LOSS:
            # if config.MODEL == 1:
            # config.L1_LOSS_WEIGHT = 10 ** np.random.uniform(-1,
            #                                                 1)  # Sample regularization candidates in the range (1 to 200)
            # config.FM_LOSS_WEIGHT = 10 ** np.random.uniform(-1,
            #                                                 1.5)  # Sample regularization candidates in the range (1 to 200)
            # config.ADV_LOSS_WEIGHT = 10 ** np.random.uniform(-1,
            #                                                  1)  # Sample regularization candidates in the range (1 to 200)
            if config.MODEL != 1:
                # config.L1_LOSS_WEIGHT = 10 ** np.random.uniform(-1,
                #                                                 1)  # Sample regularization candidates in the range (1 to 200)
                # config.FM_LOSS_WEIGHT = 10 ** np.random.uniform(-1,
                #                                                 1.5)  # Sample regularization candidates in the range (1 to 200)
                config.STYLE_LOSS_WEIGHT = np.random.uniform(
                    10, 400
                )  # Sample regularization candidates in the range (1 to 200)
                # config.CONTENT_LOSS_WEIGHT = 2 * 10 ** np.random.uniform(0,
                #                                                          2)  # Sample regularization candidates in the range (1 to 200)
                # config.INPAINT_ADV_LOSS_WEIGHT = 10 ** np.random.uniform(-1,
                #                                                  1)  # Sample regularization candidates in the range (1 to 200)
        model = EdgeConnect(config)
        model.load()
        # config.print()
        print('\nEx %d: learning_rate:%f  D_Learning_rate: %f:' %
              (i, config.LR, config.D2G_LR))
        if TRAIN_LOSS:
            if config.MODEL == 1:
                print('Ex %d: L1:%f  FM: %f  ADV: %f:' %
                      (i, config.L1_LOSS_WEIGHT, config.FM_LOSS_WEIGHT,
                       config.ADV_LOSS_WEIGHT))
            if config.MODEL != 1:
                print('Ex %d: L1:%f  FM: %f  STYLE: %f CONTENT: %f ADV: %f:' %
                      (i, config.L1_LOSS_WEIGHT, config.FM_LOSS_WEIGHT,
                       config.STYLE_LOSS_WEIGHT, config.CONTENT_LOSS_WEIGHT,
                       config.INPAINT_ADV_LOSS_WEIGHT))

        model.train()

    os._exit(0)
예제 #31
0
    # v7w
    parser.add_argument('--use_feature', default='bottom', type=str, help='use bottom-up feature or grid feature')

    # SAN
    parser.add_argument('--num_stacks', default=2, type=int,
                        help='num of stacks in Stack Attention Networks')


    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()

    utils.create_dir(args.output)
    logger = utils.Logger(os.path.join(args.output, 'log.txt'))
    logger.write(args.__repr__())
    # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
    device = torch.device("cuda:" + str(args.gpu) if args.gpu >= 0 else "cpu")
    args.device = device

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True
    torch.cuda.set_device(args.gpu)

    dictionary = Dictionary.load_from_file('data_v7w/dictionary.pkl')
    train_dset = V7WDataset('train', args, dictionary, adaptive=True, max_boxes=args.max_boxes,
                            question_len=args.question_len)
    val_dset = V7WDataset('val', args, dictionary, adaptive=True, max_boxes=args.max_boxes,
예제 #32
0
def reproduce_frame(pretrained_models: PretrainedModels,
                    batch_size: int = 64,
                    output_dir: str = "./results/frame"):
    print("Test Frame-Classifier...")
    create_dir(output_dir)

    model = pretrained_models.frame

    # Frame Dataset/Dataloader
    dataset = SequenceReadingsDataset(test_type="frame")
    dataloader = DataLoader(dataset,
                            num_workers=4,
                            shuffle=False,
                            batch_size=batch_size,
                            collate_fn=collate_tensors)

    # Create result DataFrame
    results = pd.DataFrame(
        data={
            "seq_id": dataset.sequence_id,
            "dna": dataset.dna_sequence,
            "aa": dataset.aa_sequence,
            "stop_codons": dataset.contains_stop,
            "frame": dataset.label_frame
        })

    # Frame classification
    logits, results["frame_pred"] = predict(model, dataloader,
                                            pretrained_models.tokenizer,
                                            pretrained_models.device)
    np.save(os.path.join(output_dir, "frame_logits"), logits)

    # Confusion matrix
    plot_confusion_heatmap(results["frame"],
                           results["frame_pred"],
                           os.path.join(output_dir, "frame_conf-matrix.png"),
                           ["Actual Frame"], ["Frame Prediction"],
                           normalize=True)

    # ROC
    plot_roc(logits,
             results["frame"].to_numpy(),
             save_fig=os.path.join(output_dir, "frame_ROC.png"))

    # Accuracy
    confusion_matrix = pd.crosstab(results["frame"],
                                   results["frame_pred"],
                                   normalize=True).to_numpy()
    frame_classes = {
        "0": "on-frame",
        "1": "offset by one base",
        "2": "offset by two bases",
        "3": "reverse-complementary",
        "4": "reverse-complementary and offset by one base",
        "5": "reverse complementary and offset by two bases"
    }
    accuracy_to_stdout(confusion_matrix, frame_classes)

    # Correct frames
    results["aa_shifted"] = frame_correction(results["dna"],
                                             results["frame_pred"])
    dataloader = DataLoader(Frame_Dataset(results["aa_shifted"]),
                            num_workers=4,
                            shuffle=False,
                            batch_size=batch_size)

    # Rerun frame xlassification
    _, results["aa_shifted_frame_pred"] = predict(model, dataloader,
                                                  pretrained_models.tokenizer,
                                                  pretrained_models.device)

    # Evaluate Frame Re-Classification
    plot_confusion_heatmap(results["frame"],
                           results["aa_shifted_frame_pred"],
                           os.path.join(output_dir,
                                        "shifted_frame_conf-matrix.png"),
                           ["Actual Frame"], ["Frame Prediction"],
                           normalize=True)

    # Save results and checkout
    results.to_hdf(os.path.join(output_dir, "results_dataframe.h5"),
                   key='classification_results',
                   mode='w',
                   format='table')
    print("... finished. Results are saved in {}\n".format(output_dir))
예제 #33
0
def randomTune(config):
    # 'LR': 0.0001,                   # learning rate
    # 'D2G_LR': 0.1,                  # discriminator/generator learning rate ratio
    # 'BETA1': 0.0,                   # adam optimizer beta1
    # 'BETA2': 0.9,                   # adam optimizer beta2
    # 'L1_LOSS_WEIGHT': 1,            # l1 loss weight
    # 'FM_LOSS_WEIGHT': 10,           # feature-matching loss weight
    config.MAX_STEPS = 3200
    config.EVAL_INTERVAL = 80
    config.MAX_EPOCHES = 10
    # config.MAX_STEPS = 3
    config.BATCH_SIZE = 16
    experiments = 50
    for i in range(experiments):
        # sample from a Uniform distribution on a log-scale
        # config.LR = 10 ** np.random.uniform(-3, -5)  # Sample learning rate candidates in the range (0.001 to 0.00001)
        # config.D2G_LR = 10 ** np.random.uniform(-2,
        #                                         0)  # Sample regularization candidates in the range (0.01 to 0.0001)
        # config.LR = 0.0001
        # config.D2G_LR =0.1
        # # config.LR=0.0001

        # config.PATH = './checkpoints/tune_parameters/places2_tune_%d_%f%f_' % (i, config.LR, config.D2G_LR)
        # logdir= config.PATH+('/log_%s_%s' % (config.LR , config.D2G_LR))
        if TRAIN_LOSS:
            # if config.MODEL == 1:
            # config.L1_LOSS_WEIGHT = 10 ** np.random.uniform(-1,1)
            # config.FM_LOSS_WEIGHT = 10 ** np.random.uniform(-1,1.5)
            # config.ADV_LOSS_WEIGHT = 10 ** np.random.uniform(-1,1)
            # config.STYLE_LOSS_WEIGHT = np.random.uniform(0, 300)
            # config.CONTENT_LOSS_WEIGHT = 2 * 10 ** np.random.uniform(0, 2)
            # config.INPAINT_ADV_LOSS_WEIGHT = 10 ** np.random.uniform(-1, 1)
            # if config.MODEL != 1:
            #  Sample regularization candidates in the range (1 to 200)
            max_number = math.log(300, 10)
            config.L1_LOSS_WEIGHT = 10**np.random.uniform(-1, max_number)
            config.FM_LOSS_WEIGHT = 10**np.random.uniform(-1, max_number)
            # config.GRADIENT_LOSS_WEIGHT= 10 ** np.random.uniform(-1,max_number)
            config.STYLE_LOSS_WEIGHT = 10**np.random.uniform(-1, max_number)
            config.CONTENT_LOSS_WEIGHT = 10**np.random.uniform(-1, max_number)
            config.INPAINT_ADV_LOSS_WEIGHT = 10**np.random.uniform(
                -1, max_number)

        config.PATH = './checkpoints/tune_parameters/ex%d_L1_%f_ADV_%f_Style_%f_Perc_%f_Grad_%f_FM_%f' % (
            i, config.L1_LOSS_WEIGHT, config.INPAINT_ADV_LOSS_WEIGHT,
            config.STYLE_LOSS_WEIGHT, config.CONTENT_LOSS_WEIGHT,
            config.GRADIENT_LOSS_WEIGHT, config.FM_LOSS_WEIGHT)
        create_dir(config.PATH)
        model = CLFNet(config)
        model.load()
        # config.print()
        # print('\nEx %d: learning_rate:%f  D_Learning_rate: %f:' % (i, config.LR, config.D2G_LR))
        if TRAIN_LOSS:
            print(
                'Ex %d - L1:%f  FM: %f  STYLE: %f CONTENT: %f ADV: %f: GRAD: %f'
                %
                (i, config.L1_LOSS_WEIGHT, config.FM_LOSS_WEIGHT,
                 config.STYLE_LOSS_WEIGHT, config.CONTENT_LOSS_WEIGHT,
                 config.INPAINT_ADV_LOSS_WEIGHT, config.GRADIENT_LOSS_WEIGHT))
            # if config.MODEL == 1:
            #     print('Ex %d: L1:%f  FM: %f  ADV: %f:' % (
            #         i, config.L1_LOSS_WEIGHT, config.FM_LOSS_WEIGHT, config.ADV_LOSS_WEIGHT))
            # if config.MODEL != 1:
            #     print('Ex %d: L1:%f  FM: %f  STYLE: %f CONTENT: %f ADV: %f:' % (i, config.L1_LOSS_WEIGHT,
            #                                                                     config.FM_LOSS_WEIGHT,
            #                                                                     config.STYLE_LOSS_WEIGHT,
            #                                                                     config.CONTENT_LOSS_WEIGHT,
            #                                                                     config.INPAINT_ADV_LOSS_WEIGHT))
        model.train()
    os._exit(0)
import numpy as np
from main import main
import multiprocessing
import os
from src.utils import create_config, create_dir, init_config
import yaml
from shutil import copyfile

debug = False
if __name__ == '__main__':
    # inital
    multiprocessing.set_start_method('spawn')
    checkpoints_path = './checkpoints/cell'  # model checkpoints path
    create_dir(checkpoints_path)
    config_path = os.path.join(checkpoints_path, 'config.yml')
    create_config(config_path)
    init_config(checkpoints_path, debug, EPOCH=50, INTERVAL=1000)

    # pre_train
    main(0, config_path)
    # train config
    create_config(config_path)
    init_config(checkpoints_path,
                debug,
                EPOCH=50,
                INTERVAL=1000,
                EVAL_INTERVAL_EPOCH=0.1)
    copyfile('checkpoints/cell/EdgeDetect_pre.pth',
             'checkpoints/cell/EdgeDetect.pth')
    # # train
    main(1, config_path)
else:
    # split into sets based on folds
    val_df = df[df.fold == int(args.val_fold)]
    train_df = df[df.fold != int(args.val_fold)]
    args.run = f"{args.run}.fold_{args.val_fold:.0f}"

assert val_df.shape[0] + train_df.shape[0] == df.shape[0]
print(f"* Training set size: {train_df.shape[0]}")
print(f"* Validation set size: {val_df.shape[0]}")

# endregion

# region: prepare paths

td_dir = f"/app/.tensorboard/{args.run}"
create_dir(td_dir, remove=True)

checkpoint_path = f"{c['WORK_DIR']}/models/{args.run}"
create_dir(f"{c['WORK_DIR']}/models", remove=False)

# endregion

# region: problem type-dependent params

assert ds_meta["args"]["labels_mode"] in ["multilabel", "multiclass"]

if ds_meta["args"]["labels_mode"] == "multiclass":
    final_activation = "softmax"
    loss = "categorical_crossentropy"
elif ds_meta["args"]["labels_mode"] == "multilabel":
    final_activation = "sigmoid"
예제 #36
0
def temp_dir():
    create_dir(TEST_TEMP_DIR)
예제 #37
0
def prepare_output_path(output_path):
    """ Set output_path and create a content dir if needed """
    if not output_path:
        output_path = 'exports/inventory'
    utils.create_dir(output_path)
    return output_path
def get_file_path(system, name, output_dir):
    """ Creates the path for the directory that will contain the component
    if it doesn't exist and returns the file path of component yaml"""
    output_path = os.path.join(output_dir, system)
    utils.create_dir(output_path)
    return os.path.join(output_path, '{0}.yaml'.format(slugify(name)))
    # Parse configuration
    args = parse_args()

    # Set computational device
    if args.cpu is True:
        device = "cpu"
    else:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Calculations will be executed on: ", device)

    # Create output folder. User defined name or subfolder in a result directory with input file name
    if args.output is None:
        output_path = os.path.join(
            "./results/",
            args.input.split(".fasta")[-2].split("/")[-1])
        create_dir(output_path)
        args.output = output_path
    else:
        create_dir(args.output)

    # Initialize pretrained models
    pretrained_models = PretrainedModels(args.pretrained_model, device=device)

    # Prepare data
    dataset = SequenceReadingsDataset(args.input)
    print("Dataset loaded ({} items) ".format(len(dataset)))
    dataloader = DataLoader(dataset,
                            num_workers=4,
                            shuffle=False,
                            batch_size=args.batch_size,
                            collate_fn=collate_tensors)
예제 #40
0
def train(args,
          model,
          train_loader,
          eval_loader,
          num_epochs,
          output,
          opt=None,
          s_epoch=0):
    device = args.device
    lr_default = args.lr
    lr_decay_step = 2
    lr_decay_rate = .25
    lr_decay_epochs = range(
        10, 20, lr_decay_step) if eval_loader is not None else range(
            10, 20, lr_decay_step)
    gradual_warmup_steps = [
        0.5 * lr_default, 1.0 * lr_default, 1.5 * lr_default, 2.0 * lr_default
    ]
    saving_epoch = 0
    grad_clip = args.clip_norm
    utils.create_dir(output)
    optim = torch.optim.Adamax(filter(lambda p: p.requires_grad, model.parameters()), lr=lr_default) \
        if opt is None else opt

    # Initial loss function
    criterion = torch.nn.BCEWithLogitsLoss(reduction='sum')

    logger = utils.Logger(os.path.join(output, 'log.txt'))
    logger.write(args.__repr__())
    best_eval_score = 0

    utils.print_model(model, logger)
    logger.write('optim: adamax lr=%.4f, decay_step=%d, decay_rate=%.2f, grad_clip=%.2f' % \
        (lr_default, lr_decay_step, lr_decay_rate, grad_clip))

    # Create trainer
    trainer = Trainer(args, model, criterion, optim)
    update_freq = int(args.update_freq)
    wall_time_start = time.time()
    for epoch in range(s_epoch, num_epochs):
        total_loss = 0
        train_score = 0
        total_norm = 0
        count_norm = 0
        num_updates = 0
        t = time.time()
        N = len(train_loader.dataset)
        num_batches = int(N / args.batch_size + 1)
        if epoch < len(gradual_warmup_steps):
            trainer.optimizer.param_groups[0]['lr'] = gradual_warmup_steps[
                epoch]
            logger.write('gradual warmup lr: %.8f' %
                         trainer.optimizer.param_groups[0]['lr'])
        elif epoch in lr_decay_epochs:
            trainer.optimizer.param_groups[0]['lr'] *= lr_decay_rate
            logger.write('decreased lr: %.8f' %
                         trainer.optimizer.param_groups[0]['lr'])
        else:
            logger.write('lr: %.8f' % trainer.optimizer.param_groups[0]['lr'])
        for i, (v, b, q, a, ans_mc, ans_gt) in enumerate(train_loader):
            v = v.to(device)
            b = b.to(device)
            q = q.to(device)
            a = a.to(device)
            ans_mc = ans_mc.to(device)

            # Clone each sample to 4 samples
            v = v.unsqueeze(1).expand(v.size(0), 4, v.size(1),
                                      v.size(2)).contiguous().view(
                                          v.size(0) * 4, v.size(1), v.size(2))
            q = q.unsqueeze(1).expand(q.size(0), 4,
                                      q.size(1)).contiguous().view(
                                          q.size(0) * 4, q.size(1))
            ans_mc = ans_mc.view(
                ans_mc.size(0) * ans_mc.size(1), ans_mc.size(2))
            a = a.view(ans_mc.size(0), 1)
            labels = torch.cat([a, 1 - a], 1)
            labels = labels.to(device)

            sample = [v, b, q, labels, ans_mc]
            if i < num_batches - 1 and (i + 1) % update_freq > 0:
                trainer.train_step(sample, update_params=False)
            else:
                loss, grad_norm, batch_score = trainer.train_step(
                    sample, update_params=True)
                total_norm += grad_norm
                count_norm += 1
                total_loss += loss.item()
                train_score += batch_score
                num_updates += 1
                if num_updates % int(args.print_interval / update_freq) == 0:
                    print(
                        "Iter: {}, Loss {:.4f}, Norm: {:.4f}, Total norm: {:.4f}, Num updates: {}, Wall time: {:.2f},"
                        " ETA: {}".format(i + 1,
                                          total_loss / ((num_updates + 1)),
                                          grad_norm, total_norm, num_updates,
                                          time.time() - wall_time_start,
                                          utils.time_since(t,
                                                           i / num_batches)))

        total_loss /= num_updates
        train_score = 100 * train_score / (num_updates * args.batch_size)
        if eval_loader is not None:
            print("Evaluating...")
            trainer.model.train(False)
            eval_score, bound = evaluate(model, eval_loader, args)
            trainer.model.train(True)

        logger.write('epoch %d, time: %.2f' % (epoch, time.time() - t))
        logger.write('\ttrain_loss: %.2f, norm: %.4f, score: %.2f' %
                     (total_loss, total_norm / count_norm, train_score))
        if eval_loader is not None:
            logger.write('\teval score: %.2f (%.2f)' %
                         (100 * eval_score, 100 * bound))

        # Save per epoch
        if epoch >= saving_epoch:
            model_path = os.path.join(output, 'model_epoch%d.pth' % epoch)
            utils.save_model(model_path, model, epoch, trainer.optimizer)
            # Save best epoch
            if eval_loader is not None and eval_score > best_eval_score:
                model_path = os.path.join(output, 'model_epoch_best.pth')
                utils.save_model(model_path, model, epoch, trainer.optimizer)
                best_eval_score = eval_score