Exemple #1
0
 def fromCmdArgs(cls, args: argparse.Namespace) -> TrainOptions:
     """Creates TrainOptions instance from cmdline arguments"""
     if args.f != "":
         jsonFile = Path(makePathAbsolute(args.f))
         if jsonFile.exists() and jsonFile.is_file():
             with jsonFile.open() as f:
                 content = f.read()
                 return jsonpickle.decode(content)
         else:
             raise ValueError("Could not find JSON input file")
     else:
         return cls(
             inputFile=args.i,
             outputDir=args.o,
             ecWeightsFile=args.a,
             type=args.t,
             fpType=args.k,
             fpSize=args.s,
             encFPSize=args.d,
             epochs=args.e,
             kFolds=args.K,
             testingFraction=args.l,
             enableMultiLabel=args.m,
             verbose=args.v,
             trainAC=args.trainAC,
             trainFNN=args.trainFNN,
             compressFeatures=args.c,
             sampleFractionOnes=args.sampleFractionOnes,
             sampleDown=args.sampleDown
         )
Exemple #2
0
def train(opts: options.TrainOptions):
    """
    Run the main training procedure
    :param opts: Options defining the details of the training
    """
    df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize)

    # Create output dir if it doesn't exist
    createDirectory(opts.outputDir)

    if opts.compressFeatures:  # compress features

        if opts.trainAC:
            # train an autoencoder on the full feature matrix
            encoder = ac.train_full_ac(df, opts)
        else:
            # load trained model for autoencoder
            (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize)
            encoder.load_weights(makePathAbsolute(opts.ecWeightsFile))

        # compress the fingerprints using the autoencoder
        df = ac.compress_fingerprints(df, encoder)

    if opts.trainFNN:
        # train single label models
        fNN.train_nn_models(df=df, opts=opts)

    # train multi-label models
    if opts.enableMultiLabel:
        fNN.train_nn_models_multi(df=df, opts=opts)
Exemple #3
0
def main():
    """
    Main function that runs training/prediction defined by command line arguments
    """
    parser = options.createCommandlineParser()
    prog_args: Namespace = parser.parse_args()

    try:
        if prog_args.method == "convert":
            directory = makePathAbsolute(prog_args.f)
            if path.isdir(directory):
                createLogger(path.join(directory, "convert.log"))
                logging.info(f"Convert all data files in {directory}")
                fp.convert_all(directory)
            else:
                raise ValueError("Input directory is not a directory")
        if prog_args.method == "train":
            train_opts = options.TrainOptions.fromCmdArgs(prog_args)
            fixed_opts = dataclasses.replace(
                train_opts,
                inputFile=makePathAbsolute(train_opts.inputFile),
                outputDir=makePathAbsolute(train_opts.outputDir))
            createDirectory(fixed_opts.outputDir)
            createLogger(path.join(fixed_opts.outputDir, "train.log"))
            logging.info(
                f"The following arguments are received or filled with default values:\n{fixed_opts}"
            )
            train(fixed_opts)
            exit(0)
        elif prog_args.method == "predict":
            predict_opts = options.PredictOptions.fromCmdArgs(prog_args)
            fixed_opts = dataclasses.replace(
                predict_opts,
                inputFile=makePathAbsolute(predict_opts.inputFile),
                outputDir=makePathAbsolute(predict_opts.outputDir))
            createDirectory(fixed_opts.outputDir)
            createLogger(path.join(fixed_opts.outputDir, "predict.log"))
            logging.info(
                f"The following arguments are received or filled with default values:\n{prog_args}"
            )
            predict(fixed_opts)
            exit(0)
    except AttributeError as e:
        print(e)
        parser.print_usage()
Exemple #4
0
 def fromCmdArgs(cls, args: argparse.Namespace) -> PredictOptions:
     """Creates TrainOptions instance from cmdline arguments"""
     if args.f != "":
         jsonFile = Path(makePathAbsolute(args.f))
         if jsonFile.exists() and jsonFile.is_file():
             with jsonFile.open() as f:
                 content = f.read()
                 return jsonpickle.decode(content)
         else:
             raise ValueError("Could not find JSON input file")
     else:
         return cls(
             inputFile=args.i,
             outputDir=args.o,
             ecWeightsFile=args.ECmodel,
             model=args.model,
             target=args.target,
             fpSize=args.s,
             encFPSize=args.d,
             type=args.t,
             fpType=args.k,
         )