Example #1
0
class GetCompoundsFromChEMBL(luigi.Task):
    """
    This task will retrieve compounds from ChEMBL and save them
    In the SDF format. Compounds will be prefiltered by:
     - logP (lower limit)
     - number of aromatic rings (upper limit)
     - chirality (exact value)
     - molecular weight (upper limit)
    """
    logP = luigi.FloatParameter(default=1.9)
    rings_number = luigi.IntParameter(default=3)
    chirality = luigi.IntParameter(default=(-1))
    mwt = luigi.FloatParameter(default=100.0)

    def requires(self):
        return []

    def run(self):
        molecule = new_client.molecule
        molecule.set_format('sdf')

        mols = molecule.filter(molecule_properties__acd_logp__gte=self.logP) \
                       .filter(molecule_properties__aromatic_rings__lte=self.rings_number) \
                       .filter(chirality=self.chirality) \
                       .filter(molecule_properties__full_mwt__lte=self.mwt)

        with self.output().open('w') as output:
            for mol in mols:
                output.write(mol)
                output.write('$$$$\n')

    def output(self):
        return luigi.LocalTarget('mols_2D.sdf')
Example #2
0
class Transmission(luigi.Task):
    id = luigi.IntParameter()
    name = luigi.Parameter()
    wavelength = luigi.FloatParameter()  # nm
    theta = luigi.FloatParameter()  # deg

    def requires(self):
        return self.clone(Run)

    def run(self):
        # Update cached theta.
        f = TFile(self.input()[1].fn)
        gps = f.Get("generalParticleSourceMessenger")
        gps.GetEntry(0)
        hits = f.Get("hits")
        # Calculate transmission
        trans = u.ufloat(hits.GetEntries(), np.sqrt(hits.GetEntries()))
        trans = trans / gps.nParticles
        f.Close()
        # Write to pandas DataFrame for later merging.
        df = pd.DataFrame({
            "wavelength": [self.wavelength],
            "theta": [self.theta],
            "t": [trans]
        })
        df.to_pickle(self.output().fn)

    def output(self):
        return luigi.LocalTarget("./results/%s/wico-%d.pkl" %
                                 (self.name, self.id))
Example #3
0
class Run(luigi.contrib.external_program.ExternalProgramTask):
    id = luigi.IntParameter()
    name = luigi.Parameter()
    wavelength = luigi.FloatParameter() # nm
    dz = luigi.FloatParameter() # mm
    
    nice_level = luigi.IntParameter(5, significant=False)
    n_particles = luigi.IntParameter(3000, significant=False)
    ug11_filter_thickness = luigi.FloatParameter(1, significant=False)
    
    def program_args(self):
        # Create macro file.
        mac = self.output()[0]
        if not mac.exists():
            with mac.open("w") as o:
                e = scipy.constants.value("Planck constant in eV s") * scipy.constants.c / (self.wavelength * 1e-9)
                print >> o, "/gps/energy/eMin %.18f eV" % e 
                print >> o, "/gps/energy/eMax %.18f eV" % e
                print >> o, "/gps/nParticles %d" % self.n_particles
                print >> o, "/gps/angle/thetaMin 0 deg"
                print >> o, "/gps/angle/thetaMax 0.75 deg"
                print >> o, "/gps/angle/phiMin 0 deg"
                print >> o, "/gps/angle/phiMax 360 deg"
                print >> o, "/g4sipm/digitize/hits 0"
                print >> o, "/run/beamOn 1"
        output = self.output()[1]
        return ["nice", "-n", self.nice_level,
                "./sim", "--mac", mac.fn, "--output", output.fn, "--dz", self.dz, "--ug11-filter-thickness", self.ug11_filter_thickness]
                   
    def output(self):
        return [luigi.LocalTarget("./results/%s/famous-%d.mac" % (self.name, self.id)),
                luigi.LocalTarget("./results/%s/famous-%d.root" % (self.name, self.id))]
Example #4
0
class MLPClassifier(ClassifierWithTransferLearningKerasModelTraining):
    input_shape: Tuple[int, int] = luigi.TupleParameter(default=(100, ))
    batch_size: int = luigi.IntParameter(default=10)
    learning_rate = luigi.FloatParameter(default=1e-5)
    dense_layers: List[int] = luigi.ListParameter(default=[512, 512])
    dropout: float = luigi.FloatParameter(default=None)
    activation_function: str = luigi.ChoiceParameter(
        choices=KERAS_ACTIVATION_FUNCTIONS.keys(), default="relu")
    kernel_initializer: str = luigi.ChoiceParameter(
        choices=KERAS_WEIGHT_INIT.keys(), default="glorot_uniform")

    def create_base_model(self) -> Model:
        x_input = Input(shape=self.input_shape)

        mlp = Dense(self.dense_layers[0],
                    activation=self.activation_function,
                    kernel_initializer=self.kernel_initializer)(x_input)

        for dense_neurons in self.dense_layers[1:]:
            mlp = Dense(dense_neurons,
                        activation=self.activation_function,
                        kernel_initializer=self.kernel_initializer)(mlp)
            #model.add(BatchNormalization())
            if self.dropout:
                mlp = Dropout(self.dropout)(mlp)

        output = Dense(1, activation='sigmoid')(mlp)
        model = Model(x_input, output, name='BaseMLP')

        return model

    def create_model_with(self, base_model: Model) -> Model:
        return base_model
Example #5
0
class SplitDataset(luigi.Task):
    test_size = luigi.FloatParameter(default=0.1)
    random_state = luigi.FloatParameter(default=12)

    def run(self):
        with self.input().open('r') as f:
            motions = f.readlines()

        train, test = train_test_split(motions,
                                       test_size=self.test_size,
                                       random_state=self.random_state)

        with self.output()['train'].open('w') as f:
            f.writelines(train)

        with self.output()['test'].open('w') as f:
            f.writelines(test)

    def output(self):
        data_folder = luigi.configuration.get_config().get(
            'GlobalConfig', 'data_folder')
        return {
            'train': luigi.LocalTarget(os.path.join(data_folder, 'train.txt')),
            'test': luigi.LocalTarget(os.path.join(data_folder, 'test.txt'))
        }
Example #6
0
class GenerateImageByBounds(luigi.WrapperTask):
    """
    Schedule Download Tasks
    """
    west = luigi.FloatParameter()
    north = luigi.FloatParameter()
    south = luigi.FloatParameter()
    east = luigi.FloatParameter()
    zoom = luigi.IntParameter()
    targetTask = luigi.TaskParameter(default=GenerateImageCSReliefMap)

    def requires(self):
        """
        scheduling tasks
        """

        candidateTasks = [
            GenerateImageCSReliefMap, GenerateImageCurvature,
            GenerateImageSlope
        ]
        if not self.targetTask in candidateTasks:
            raise

        edge_nw_x, edge_nw_y, _, _ = deg_to_num(self.north, self.west,
                                                self.zoom)
        edge_se_x, edge_se_y, _, _ = deg_to_num(self.south, self.east,
                                                self.zoom)
        # xRange = [edge_nw_x, edge_se_x]
        # yRange = [edge_nw_y, edge_se_y]
        print deg_to_num(self.north, self.west, self.zoom) + deg_to_num(
            self.south, self.east, self.zoom)
        for tile_x in range(edge_nw_x - 3, edge_se_x + 3):
            for tile_y in range(edge_nw_y - 3, edge_se_y + 3):
                yield self.targetTask(x=tile_x, y=tile_y, z=self.zoom)
Example #7
0
class GenerateJijiDataSplits(gokart.TaskOnKart):
    task_namespace = "context_nmt"
    jiji_source_path = luigi.Parameter()
    target_path = luigi.Parameter()
    dev_proportion = luigi.FloatParameter()
    test_proportion = luigi.FloatParameter()
    quality_aware = luigi.BoolParameter()
    score_threhold = luigi.FloatParameter(default=0.3)

    def requires(self):
        return MergeJijiFiles(
            source_path=self.jiji_source_path,
            quality_aware=self.quality_aware,
            score_threhold=self.score_threhold,
        )

    def output(self):
        return self.input()

    def run(self):
        documents = self.load()
        test_size = self.test_proportion
        dev_size = self.dev_proportion / (1 - test_size)
        train, test = map(
            dict, train_test_split(list(documents.items()),
                                   test_size=test_size))
        train, dev = map(
            dict, train_test_split(list(train.items()), test_size=dev_size))
        if not os.path.isdir(self.target_path):
            os.mkdir(self.target_path)
        for name, split in (("train", train), ("dev", dev), ("test", test)):
            with open(f"{self.target_path}/{name}.json", "w") as target:
                json.dump(split, target, ensure_ascii=False)
Example #8
0
class ExtractSegment(luigi.Task):
    task_namespace = 'voxceleb'

    priority = 3

    person = luigi.Parameter()
    video = luigi.Parameter()
    segment = luigi.IntParameter()
    start = luigi.FloatParameter()
    stop = luigi.FloatParameter()

    def requires(self):
        return DownloadAudio(video=self.video)

    def output(self):
        return luigi.LocalTarget(
            data_out_path(
                'segments', 'original', self.person,
                '{}_{:07d}.wav'.format(self.video, int(self.segment))))

    def run(self):
        ffmpeg = FFmpeg(ffmpeg_bin=Config().ffmpeg_bin)
        with AtomizedLocalTarget(self.output()) as target:
            ffmpeg.extract_segment(self.input().path,
                                   str(target.path),
                                   start=self.start,
                                   stop=self.stop,
                                   timeout=300)
        check_output(self.output().path)
Example #9
0
class GeneratePsudoData(gokart.TaskOnKart):
    task_namespace = 'novelty_enhanced_bpr'

    test_size: float = luigi.FloatParameter(default=0.3)
    validation_size: float = luigi.FloatParameter(default=0.1)

    def requires(self):
        item_embed_vector_task = GenerateItemEmbedVectors()
        user_embed_vector_task = GenerateUserEmbedVectors()
        user_item_iteraction_task = GenerateUserItemInteractions(
            item_embed_vector_task=item_embed_vector_task,
            user_embed_vector_task=user_embed_vector_task)
        item_distance_task = GetItemDistance(
            item_embed_vector_task=item_embed_vector_task)
        return dict(item_distance=item_distance_task,
                    user_item_interaction=user_item_iteraction_task)

    def run(self):
        clicks = self.load('user_item_interaction')
        item_distance = self.load('item_distance')

        clicks_train, clicks_test = train_test_split(clicks,
                                                     test_size=self.test_size)
        clicks_train, clicks_validation = train_test_split(
            clicks_train,
            test_size=self.validation_size / (1 - self.test_size))

        self.dump(
            dict(clicks_train=clicks_train,
                 clicks_validation=clicks_validation,
                 clicks_test=clicks_test,
                 item_distance=item_distance))
class BICSegmentation(sciluigi.Task, AutoOutput):

    in_segmentation = None
    in_features = None

    penalty_coef = luigi.FloatParameter(default=1.0)
    covariance_type = luigi.Parameter(default='full')
    min_duration = luigi.FloatParameter(default=1.0)
    precision = luigi.FloatParameter(default=0.1)

    def run(self):

        segmenter = pyannote.algorithms.segmentation.bic.BICSegmentation(
            penalty_coef=self.penalty_coef,
            covariance_type=self.covariance_type,
            min_duration=self.min_duration,
            precision=self.precision)

        with self.in_features().open('r') as fp:
            features = pickle.load(fp)

        with self.in_segmentation().open('r') as fp:
            segmentation = pyannote.core.json.load(fp)

        timeline = segmenter.apply(features, segmentation=segmentation)

        annotation = Annotation()
        for s, segment in enumerate(timeline):
            annotation[segment] = s

        with self.out_put().open('w') as fp:
            pyannote.core.json.dump(annotation, fp)
Example #11
0
class ItemSimilaritySparkJob(luigi.Task):
    """
    Spark job for running item similarity model
    """
    inputPath = luigi.Parameter(default="/seldon-data/seldon-models/")
    outputPath = luigi.Parameter(default="/seldon-data/seldon-models/")
    client = luigi.Parameter(default="test")
    startDay = luigi.IntParameter(default=1)
    days = luigi.IntParameter(default=1)
    itemType = luigi.IntParameter(-1)
    limit = luigi.IntParameter(default=100)
    minItemsPerUser = luigi.IntParameter(default=0)
    minUsersPerItem = luigi.IntParameter(default=0)    
    maxUsersPerItem = luigi.IntParameter(default=2000000)
    dimsumThreshold =luigi.FloatParameter(default=0.1)
    sample = luigi.FloatParameter(default=1.0)

    def output(self):
        return luigi.LocalTarget("{}/{}/item-similarity/{}".format(self.outputPath,self.client,self.startDay))

    def run(self):
        params = ["seldon-cli","model","--action","add","--client-name",self.client,"--model-name","similar-items","--inputPath",self.inputPath,"--outputPath",self.outputPath,"--startDay",str(self.startDay),"--days",str(self.days),"--sample",str(self.sample),"--itemType",str(self.itemType),"--limit",str(self.limit),"--minItemsPerUser",str(self.minItemsPerUser),"--minUsersPerItem",str(self.minUsersPerItem),"--maxUsersPerItem",str(self.maxUsersPerItem),"--dimsumThreshold",str(self.dimsumThreshold)]
        res = call(params)
        params = ["seldon-cli","model","--action","train","--client-name",self.client,"--model-name","similar-items"]
        res = call(params)
        return res
Example #12
0
class OvervoltageSimulation(SimulationMetaTask, luigi.Task):
    name = luigi.Parameter("overvoltage-simulation")
    wavelength = luigi.FloatParameter(450)  # nm
    n_repititions = luigi.IntParameter(1000) 
    breakdown_voltage = luigi.FloatParameter(53) # Volt
    
    @property
    def run_kwargs(self):
        if hasattr(self, '_cached_run_kwargs'):
            return self._cached_run_kwargs
        # Dice temperatures.
        overvoltages = np.random.uniform(0, 10, self.n_repititions)
        # Set run_kwargs.
        e = 4.135667516e-15 * 299792458.0 / (self.wavelength * 1e-9)
        run_kwargs = dict(e_min=e, e_max=e, n_particles=10000, digitize_hits=False, persist_hits=True, persist_digis=False)
        #
        self._cached_run_kwargs = [clone(run_kwargs, bias_voltage=(self.breakdown_voltage + ov)) for ov in overvoltages]
        return self._cached_run_kwargs
    
    def run(self):
        inputs = self.sqlite_from_runs()
        with self.output().open("w") as o:
            for input in inputs:
                con = sqlite3.connect(input.fn)
                cur = con.cursor()
                try:
                    # TODO get angle.
                    temp, vb, vov, f_th = cur.execute("SELECT temperature, breakdownVoltage, overVoltage, thermalNoiseRate FROM sipmModel;").fetchone()
                    n = cur.execute("SELECT count() FROM `g4sipmHits-0`;").fetchone()[0]
                    print >> o, temp, vb, vov, f_th, n
                except Exception as e:
                    print "Failure in", input.fn
                    print e
Example #13
0
class DynamicRangeSimulation(SimulationDynamicMetaTask, luigi.Task):
    name = luigi.Parameter("dynamic-range-simulation-schumacher")
    n_repititions = luigi.IntParameter(1000)
    step = luigi.IntParameter(1000)
    
    n_min = luigi.IntParameter(1)  # minimum number of photons
    n_max = luigi.IntParameter(1e7)  # maximum number of photons
    t_input = luigi.Parameter("../sample/resources/sawtooth-100ps.properties")
    e_min = luigi.FloatParameter(default=3.061338207066896, significant=False)  # eV (405 nm)
    e_max = luigi.FloatParameter(default=3.061338207066896, significant=False)  # eV (405 nm)
    
    def run_kwargs(self):
        kwargs = dict(exe="../fast/fast", persist_hits=False, noise_if_no_signal=True, t_input=self.t_input, bias_voltage=56.7)
        # Dice number of particles
        n = np.array(np.ceil(np.exp(np.random.uniform(np.log(self.n_min), np.log(self.n_max), self.step))), dtype="int")
        return [clone(kwargs, n_particles=ni) for ni in n]

    def run_after_yield(self):
        # Open results.
        inputs = self.sqlite_from_runs()
        with self.output().open("w") as o:
            for input in inputs:
                con = sqlite3.connect(input.fn)
                cur = con.cursor()
                try:
                    n_particles, t_min, t_max = cur.execute("SELECT nParticles, tMin, tMax FROM particleSourceMessenger;").fetchone()
                    n_eff_cells = np.sum(cur.execute("SELECT weight FROM `g4sipmDigis-0` WHERE time >= %s AND time < %s;" % (t_min, t_max)).fetchall())
                    print >> o, n_particles, n_eff_cells
                except Exception as e:
                    print "Failure in", input.fn
                    print e
Example #14
0
class Welch_t_test(luigi.Task):
    multiple_comparison_method = luigi.Parameter(default="bonferroni")
    adjusted_pval = luigi.FloatParameter(default=0.001)
    threshold = luigi.FloatParameter(default=0.3)

    def requires(self):
        return Calculate_relative_frequency_profile()

    def output(self):
        misc_dir = os.path.join(out_dir, "miscellaneous")
        Welch_kmers = os.path.join(misc_dir, "welch_kmers.txt")

        return luigi.LocalTarget(Welch_kmers)

    def run(self):
        # Load input
        with self.input()['pos'].open('r') as fh:
            pos_rel_freq_df = pd.read_csv(fh).set_index('seq_ID')
        with self.input()['neg'].open('r') as fh:
            neg_rel_freq_df = pd.read_csv(fh).set_index('seq_ID')

        welch_kmers = feature_processing.Welch_t_test(
                pos_rel_freq_df,
                neg_rel_freq_df,
                self.multiple_comparison_method,
                self.adjusted_pval,
                self.threshold
                )

        with self.output().open('w') as fh:
            for kmer in welch_kmers:
                fh.write(kmer + "\n")
Example #15
0
class WideRecommender(ClassifierWithTransferLearningKerasModelTraining):
    input_shape: Tuple[int, int] = luigi.TupleParameter(default=(100, ))
    batch_size: int = luigi.IntParameter(default=10)
    learning_rate = luigi.FloatParameter(default=1e-5)
    dense_layers: List[int] = luigi.ListParameter(default=[512, 512])
    dropout: float = luigi.FloatParameter(default=None)
    activation_function: str = luigi.ChoiceParameter(
        choices=KERAS_ACTIVATION_FUNCTIONS.keys(), default="relu")
    kernel_initializer: str = luigi.ChoiceParameter(
        choices=KERAS_WEIGHT_INIT.keys(), default="glorot_uniform")

    def create_base_model(self) -> Model:
        x_input = Input(shape=self.input_shape, name='wide_inp')

        wide = Dense(self.input_shape[0],
                     activation=self.activation_function,
                     kernel_initializer=self.kernel_initializer,
                     name='wide_mlp')(x_input)

        output = Dense(1,
                       activation='sigmoid',
                       kernel_initializer=self.kernel_initializer)(wide)

        model = Model(x_input, output, name='Wide')

        return model

    def create_model_with(self, base_model: Model) -> Model:
        return base_model
Example #16
0
class DownloadBounds(luigi.WrapperTask):
    """
    Schedule Download Tasks
    """
    baseUrl = luigi.Parameter()
    baseName = luigi.Parameter(default="output")
    west = luigi.FloatParameter()
    north = luigi.FloatParameter()
    south = luigi.FloatParameter()
    east = luigi.FloatParameter()
    zoom = luigi.IntParameter()

    def requires(self):
        """
        scheduling tasks
        """
        edge_nw_x, edge_nw_y, _, _ = deg_to_num(
            self.north, self.west, self.zoom)
        edge_se_x, edge_se_y, _, _ = deg_to_num(
            self.south, self.east, self.zoom)
        print deg_to_num(self.north, self.west, self.zoom) + deg_to_num(self.south, self.east, self.zoom)
        for tile_x in range(edge_nw_x, edge_se_x + 1):
            for tile_y in range(edge_nw_y, edge_se_y + 1):
                print "scheduling z:{} x:{} y:{}".format(self.zoom, tile_x, tile_y)
                yield DownloadTile(self.baseUrl, self.baseName, tile_x, tile_y, self.zoom)
class LinearBICClustering(sciluigi.Task, AutoOutput):

    in_segmentation = None
    in_features = None

    max_gap = luigi.FloatParameter(default=3600.0)
    penalty_coef = luigi.FloatParameter(default=1.0)
    covariance_type = luigi.Parameter(default='diag')

    def run(self):

        clustering = pyannote.algorithms.clustering.bic.LinearBICClustering(
            max_gap=self.max_gap,
            penalty_coef=self.penalty_coef,
            covariance_type=self.covariance_type)

        with self.in_features().open('r') as fp:
            features = pickle.load(fp)

        with self.in_segmentation().open('r') as fp:
            starting_point = pyannote.core.json.load(fp)

        result = clustering(starting_point, features=features)

        with self.out_put().open('w') as fp:
            pyannote.core.json.dump(result, fp)
class DatasetPairs(luigi.Task):
    """
    Outputs a list of training/testing dataset pairs.
    """

    d = luigi.Parameter()
    tr_bias = luigi.FloatParameter()
    te_bias = luigi.FloatParameter()
    foldidx = luigi.IntParameter()
    outdir = luigi.Parameter()
    tr_frac = luigi.FloatParameter(default=.5)
    random_seed = luigi.IntParameter(default=1234)
    size = luigi.IntParameter(default=1000)

    def requires(self):
        pass

    def run(self):
        # reads original dataset
        data = read_pickle(self.d)

        # creates train and test unbiased datasets
        seed = self.random_seed + self.foldidx
        np.random.seed(seed)

        ogsize = data.X.shape[0]
        data_range = list(range(ogsize))
        np.random.shuffle(data_range)
        spliti = int(ogsize * self.tr_frac)
        tr_split = data_range[:spliti]
        te_split = data_range[spliti:]

        tr_data = Dataset(X=data.X[tr_split],
                          y=data.y[tr_split],
                          z=data.z[tr_split])
        te_data = Dataset(X=data.X[te_split],
                          y=data.y[te_split],
                          z=data.z[te_split])

        # creates biased datasets
        biased_pair = [
            tr_data.make_confounding_dataset(self.tr_bias, self.size),
            te_data.make_confounding_dataset(self.te_bias, self.size),
        ]
        for d in biased_pair:
            d.parent = None

        # writes out datasets
        for d, outpath in zip(biased_pair, self.output()):
            with outpath.open("w") as fd:
                pickle.dump(d, fd)

    def output(self):
        fname = "trbias={:.3f}_tebias={:.3f}_size={}_foldidx={}_trfrac={:.3f}".format(
            self.tr_bias, self.te_bias, self.size, self.foldidx, self.tr_frac)
        fpath = os.path.join(self.outdir, "datapairs", fname)
        fpaths = [_.format(fpath) for _ in ["{}_train.pkl", "{}_test.pkl"]]

        return [luigi.LocalTarget(_, format=luigi.format.Nop) for _ in fpaths]
Example #19
0
class AnalysisTask(luigi.Task, ABC):
    """
    Abstract class that requires the completion of dataset selection and model training.

    """

    imgfolder = luigi.Parameter()
    hdffolder = luigi.Parameter()
    modelsfolder = luigi.Parameter()
    target_size = luigi.IntParameter()  # standardizing to square images
    keep_categories = luigi.ListParameter()
    fractions = luigi.ListParameter()  # train/valid/test fraction
    model_definition = luigi.Parameter(
    )  # JSON file with model definition specs
    sigma = luigi.FloatParameter(default=0.5)
    threshold = luigi.BoolParameter(default=False)
    rest_as_other = luigi.BoolParameter(
        default=False
    )  # set the remaining as "other" - not recommended for small keep_category lengths
    whiten = luigi.BoolParameter(default=False)
    epsilon = luigi.FloatParameter(default=0.1)

    def requires(self):
        """

        Task depends on a trained model and a dataset archive.

        :return: Dictionary with TrainKerasModelFromDefinitionTask and SelectDatasetTask
        """
        return {
            "model":
            TrainKerasModelFromDefinitionTask(
                self.imgfolder,
                self.hdffolder,
                self.modelsfolder,
                self.target_size,
                self.keep_categories,
                self.fractions,
                self.model_definition,
                self.sigma,
                self.threshold,
                self.rest_as_other,
                self.whiten,
                self.epsilon,
            ),
            "dataset":
            SelectDatasetTask(
                self.imgfolder,
                self.hdffolder,
                self.target_size,
                self.keep_categories,
                self.fractions,
                self.sigma,
                self.threshold,
                self.rest_as_other,
                self.whiten,
                self.epsilon,
            ),
        }
Example #20
0
class HTCondorWorkflow(law.HTCondorWorkflow):
    """
    Custom htcondor workflow with good default configs for the CERN batch system.
    """

    poll_interval = luigi.FloatParameter(
        default=0.5,
        significant=False,
        description="time between "
        "status polls in minutes, default: 0.5")
    max_runtime = luigi.FloatParameter(default=24.0,
                                       significant=False,
                                       description="maximum "
                                       "runtime in hours")
    only_missing = luigi.BoolParameter(default=True,
                                       significant=False,
                                       description="skip tasks "
                                       "that are considered complete")
    cmst3 = luigi.BoolParameter(default=False,
                                significant=False,
                                description="use the CMS T3 "
                                "HTCondor quota for jobs, default: False")

    def htcondor_output_directory(self):
        return law.LocalDirectoryTarget(self.local_path(store="$HGC_STORE"))

    def htcondor_wrapper_file(self):
        return os.path.expandvars("$HGC_BASE/hgc/files/bash_wrapper.sh")

    def htcondor_bootstrap_file(self):
        return os.path.expandvars("$HGC_BASE/hgc/files/htcondor_bootstrap.sh")

    def htcondor_use_local_scheduler(self):
        return True

    def htcondor_job_config(self, config, job_num, branches):
        # render_data is rendered into all files sent with a job
        config.render_variables["hgc_base"] = os.getenv("HGC_BASE")
        # force to run on CC7, http://batchdocs.web.cern.ch/batchdocs/local/submit.html#os-choice
        config.custom_content.append(
            ("requirements", "(OpSysAndVer =?= \"CentOS7\")"))
        # copy the entire environment
        config.custom_content.append(("getenv", "true"))
        # fix for CERN htcondor batch: pass the true PATH variable as a render variable which is
        # used in the custom wapper file to set PATH
        config.render_variables["env_path"] = os.getenv("PATH")
        # the CERN htcondor setup requires a "log" config, but we can safely set it to /dev/null
        # if you are interested in the logs of the batch system itself, set a meaningful value here
        config.custom_content.append(("log", "/dev/null"))
        # set the maximum runtime
        config.custom_content.append(
            ("+MaxRuntime", int(math.floor(self.max_runtime * 3600)) - 1))
        # CMS T3 group settings
        if self.cmst3:
            config.custom_content.append(
                ("+AccountingGroup", "group_u_CMST3.all"))

        return config
class GlobalParams(luigi.Config):

    model_name = luigi.Parameter()
    project_folder = luigi.Parameter()
    timestep = luigi.FloatParameter()
    dataset_id = luigi.IntParameter()
    endtime = luigi.FloatParameter()
    nb_past_timesteps = luigi.IntParameter()
    random_seed = luigi.IntParameter()
Example #22
0
class DataStandardisation(luigi.Task):

    """
    Runs the standardised product workflow.
    """
    level1 = luigi.Parameter()
    outdir = luigi.Parameter()
    granule = luigi.OptionalParameter(default='')
    workflow = luigi.EnumParameter(enum=Workflow, default=Workflow.STANDARD)
    vertices = luigi.TupleParameter(default=(5, 5))
    method = luigi.EnumParameter(enum=Method, default=Method.SHEAR)
    pixel_quality = luigi.BoolParameter()
    land_sea_path = luigi.Parameter()
    aerosol = luigi.DictParameter(default={'user': 0.05})
    brdf = luigi.DictParameter()
    ozone_path = luigi.Parameter(significant=False)
    water_vapour = luigi.DictParameter(default={'user': 1.5},
                                       significant=False)
    dem_path = luigi.Parameter(significant=False)
    ecmwf_path = luigi.Parameter(significant=False)
    invariant_height_fname = luigi.Parameter(significant=False)
    dsm_fname = luigi.Parameter(significant=False)
    modtran_exe = luigi.Parameter(significant=False)
    tle_path = luigi.Parameter(significant=False)
    rori = luigi.FloatParameter(default=0.52, significant=False)
    compression = luigi.EnumParameter(enum=H5CompressionFilter,
                                      default=H5CompressionFilter.LZF,
                                      significant=False)
    filter_opts = luigi.DictParameter(default=None, significant=False)
    acq_parser_hint = luigi.OptionalParameter(default='')
    buffer_distance = luigi.FloatParameter(default=8000, significant=False)
    h5_driver = luigi.OptionalParameter(default='', significant=False)
    normalized_solar_zenith = luigi.FloatParameter(default=45.0)

    def output(self):
        fmt = '{label}.wagl.h5'
        label = self.granule if self.granule else basename(self.level1)
        out_fname = fmt.format(label=label)
         
        return luigi.LocalTarget(pjoin(self.outdir, out_fname))

    def run(self):
        if self.workflow == Workflow.STANDARD or self.workflow == Workflow.SBT:
            ecmwf_path = self.ecmwf_path
        else:
            ecmwf_path = None

        with self.output().temporary_path() as out_fname:
            card4l(self.level1, self.granule, self.workflow, self.vertices,
                   self.method, self.pixel_quality, self.land_sea_path,
                   self.tle_path, self.aerosol, self.brdf,
                   self.ozone_path, self.water_vapour,
                   self.dem_path, self.dsm_fname, self.invariant_height_fname,
                   self.modtran_exe, out_fname, ecmwf_path, self.rori,
                   self.buffer_distance, self.compression, self.filter_opts,
                   self.h5_driver, self.acq_parser_hint, self.normalized_solar_zenith)
Example #23
0
class GetAlignment(SlurmTask):
    min_cov = luigi.FloatParameter(default=0.8)
    min_indvs = luigi.FloatParameter(default=0.8)

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Set the SLURM request params for this task
        self.mem = 4000
        self.n_cpu = 1
        self.partition = "nbi-short"

    def output(self):
        return {
            'phy':
            LocalTarget(
                os.path.join(self.base_dir, VERSION, PIPELINE,
                             self.output_prefix, self.output_prefix + ".phy")),
        }
        #'nex': LocalTarget(os.path.join(self.base_dir, VERSION, PIPELINE, self.output_prefix, self.output_prefix + ".nex"))}

    def work(self):
        import Bio
        import Bio.SeqIO
        import Bio.AlignIO
        import contextlib
        import numpy as np

        with contextlib.ExitStack() as stack, self.output()['phy'].open(
                'w') as fphy:  #, self.output()['nex'].open('w') as fnex:

            fhs = [
                stack.enter_context(open(fname.path))
                for fname in self.input()['iupac-codes']
            ]
            parsers = zip(*[Bio.SeqIO.parse(f, 'fasta') for f in fhs])
            msa = [
                Bio.SeqRecord.SeqRecord(Bio.Seq.Seq(''), id=lib)
                for lib in self.lib_list
            ]

            for seqs in parsers:
                id, l = seqs[0].id, len(seqs[0])
                assert all([x.id == id
                            for x in seqs]), "Fasta sequences not sorted!"

                coverage = 1 - np.array([x.seq.count('N') for x in seqs]) / l
                indvs = np.mean(coverage > self.min_cov)

                if indvs > self.min_indvs:
                    for (i, x) in enumerate(seqs):
                        # 3rd codon
                        msa[i] += x.seq[::3]

            Bio.AlignIO.write(Bio.Align.MultipleSeqAlignment(msa), fphy,
                              'phylip-relaxed')
Example #24
0
class Transmission(luigi.Task):
    id = luigi.IntParameter()
    name = luigi.Parameter()
    wavelength = luigi.FloatParameter()
    dz = luigi.FloatParameter()
    n_sipms = luigi.IntParameter()
    
    def requires(self):
        return self.clone(Run)
    
    def run(self):
        df = {}
        # Open output file.
        f = TFile(self.input()[1].fn)
        # Update cached wavelength.
        gps = f.Get("generalParticleSourceMessenger")
        gps.GetEntry(0)
        self.wavelength = scipy.constants.value("Planck constant in eV s") * scipy.constants.c / (gps.energyEMin * 1e-3)
        df["wavelength"] = [self.wavelength]
        # Get number of particles.
        n = gps.nParticles
        df["n"] = [gps.nParticles]
        # Update cached dz.
        lensModel = f.Get("fresnelLensModel")
        lensModel.GetEntry(0)
        self.dz = lensModel.dz
        z = lensModel.dz + lensModel.f
        df["dz"] = [self.dz]
        df["z"] = [z]
        # Get transmission after lens.
        lens = f.Get("fresnelLensBackHits")
        df["t_after_lens"] = u.ufloat(lens.GetEntries(), np.sqrt(lens.GetEntries())) / n
        # Get transmission at focal plane.
        focal = f.Get("focalPlaneHits")
        df["t_focal_plane"] = u.ufloat(focal.GetEntries(), np.sqrt(focal.GetEntries())) / n
        # Get transmission after Winston cones.
        wico = f.Get("wicoBackHits")
        df["t_after_wico"] = u.ufloat(wico.GetEntries(), np.sqrt(wico.GetEntries())) / n
        # Get detected photons for each SiPM.
        n_sipm = 0.0
        for sipm_id in xrange(self.n_sipms):
            sipm = f.Get("g4sipmHits-%d" % sipm_id)
            if sipm != None:
                n_sipm += sipm.GetEntries()
                if sipm_id > 0:
                    continue
                df["t_sipm_%d" % sipm_id] = u.ufloat(sipm.GetEntries(), np.sqrt(sipm.GetEntries())) / n
        df["t_sipm"] = u.ufloat(n_sipm, np.sqrt(n_sipm)) / n
        f.Close()
        # Save to pandas dataframe
        df = pd.DataFrame(df)
        df.to_pickle(self.output().fn)
    
    def output(self):
        return luigi.LocalTarget("./results/%s/famous-%d.pkl" % (self.name, self.id))
Example #25
0
class SurfaceReflectance(luigi.Task):
    """Run the terrain correction over a given band."""

    band_name = luigi.Parameter()
    rori = luigi.FloatParameter(default=0.52, significant=False)
    base_dir = luigi.Parameter(default='_standardised', significant=False)
    dsm_fname = luigi.Parameter(significant=False)
    buffer_distance = luigi.FloatParameter(default=8000, significant=False)

    def requires(self):
        reqs = {
            'interpolation': self.clone(InterpolateCoefficients),
            'ancillary': self.clone(AncillaryData),
            'rel_slope': self.clone(RelativeAzimuthSlope),
            'shadow': self.clone(CalculateShadowMasks),
            'slp_asp': self.clone(SlopeAndAspect),
            'incident': self.clone(IncidentAngles),
            'exiting': self.clone(ExitingAngles),
            'sat_sol': self.clone(CalculateSatelliteAndSolarGrids)
        }

        return reqs

    def output(self):
        out_path = pjoin(self.work_root, self.group, self.base_dir)
        fname = 'reflectance-{}.h5'.format(self.band_name)
        return luigi.LocalTarget(pjoin(out_path, fname))

    def run(self):
        container = acquisitions(self.level1, self.acq_parser_hint)
        acqs = container.get_acquisitions(self.group, self.granule)

        # inputs
        inputs = self.input()
        interpolation_fname = inputs['interpolation'].path
        slp_asp_fname = inputs['slp_asp'].path
        incident_fname = inputs['incident'].path
        exiting_fname = inputs['exiting'].path
        relative_slope_fname = inputs['rel_slope'].path
        shadow_fname = inputs['shadow'].path
        sat_sol_fname = inputs['sat_sol'].path
        ancillary_fname = inputs['ancillary'].path

        # get the acquisition we wish to process
        acq = [acq for acq in acqs if acq.band_name == self.band_name][0]

        with self.output().temporary_path() as out_fname:
            _calculate_reflectance(acq, acqs, interpolation_fname,
                                   sat_sol_fname, slp_asp_fname,
                                   relative_slope_fname, incident_fname,
                                   exiting_fname, shadow_fname,
                                   ancillary_fname, self.rori, out_fname,
                                   self.compression, self.filter_opts)
Example #26
0
class MergeImgByBounds(luigi.Task):
    """
    Schedule Download Tasks
    """
    baseUrl = luigi.Parameter()
    baseName = luigi.Parameter(default="output")
    west = luigi.FloatParameter()
    north = luigi.FloatParameter()
    south = luigi.FloatParameter()
    east = luigi.FloatParameter()
    zoom = luigi.IntParameter()

    def __init__(self, *args, **kwargs):
        super(MergeImgByBounds, self).__init__(*args, **kwargs)

        edge_nw_x, edge_nw_y, _, _ = deg_to_num(
            self.north, self.west, self.zoom)
        edge_se_x, edge_se_y, _, _ = deg_to_num(
            self.south, self.east, self.zoom)
        self.edge_nw_x = edge_nw_x
        self.edge_nw_y = edge_nw_y
        self.edge_se_x = edge_se_x
        self.edge_se_y = edge_se_y

        require_list = []
        x_range = range(edge_nw_x - 1, edge_se_x + 1)
        y_range = range(edge_nw_y - 1, edge_se_y + 1)

        for x, tile_x in enumerate(x_range):
            for y, tile_y in enumerate(y_range):
                require_list.append(
                    (x, y, DownloadTile(self.baseUrl, self.baseName, tile_x, tile_y, self.zoom)))
        self.require_list = require_list

    def requires(self):
        """
        scheduling tasks
        """
        return [x[2] for x in self.require_list]

    def output(self):
        return luigi.LocalTarget("./var/combined_z{}_x{}_{}_y{}_{}.png".format(self.zoom, self.edge_nw_x, self.edge_se_x, self.edge_nw_y, self.edge_se_y))

    def run(self):
        combined_tile = Image.new(
            'RGBA', (256 * (self.edge_se_x - self.edge_nw_x + 2), 256 * (self.edge_se_y - self.edge_nw_y + 2)), (255, 255, 255, 255))

        for x, y, inputimg in self.require_list:
            input_img = Image.open(inputimg.output().fn)
            combined_tile.paste(input_img, (256 * x, 256 * y))
        with self.output().open("wb") as output_f:
            combined_tile.save(output_f, 'PNG')
Example #27
0
class RunFmask(luigi.Task):

    """
    Execute the Fmask algorithm for a given granule.
    """

    level1 = luigi.Parameter()
    granule = luigi.Parameter()
    workdir = luigi.Parameter()
    cloud_buffer_distance = luigi.FloatParameter(default=150.0)
    cloud_shadow_buffer_distance = luigi.FloatParameter(default=300.0)
    parallax_test = luigi.BoolParameter()
    upstream_settings = luigi.DictParameter(default={})
    acq_parser_hint = luigi.OptionalParameter(default="")

    def requires(self):
        # for the time being have fmask require wagl,
        # no point in running fmask if wagl fails...
        # return WorkDir(self.level1, dirname(self.workdir))
        return DataStandardisation(
            self.level1,
            self.workdir,
            self.granule,
            **self.upstream_settings,  # pylint: disable=not-a-mapping
        )

    def output(self):
        out_fname1 = pjoin(self.workdir, "{}.fmask.img".format(self.granule))
        out_fname2 = pjoin(self.workdir, "{}.fmask.yaml".format(self.granule))

        out_fnames = {
            "image": luigi.LocalTarget(out_fname1),
            "metadata": luigi.LocalTarget(out_fname2),
        }

        return out_fnames

    def run(self):
        out_fnames = self.output()
        with out_fnames["image"].temporary_path() as out_fname1:
            with out_fnames["metadata"].temporary_path() as out_fname2:
                fmask(
                    self.level1,
                    self.granule,
                    out_fname1,
                    out_fname2,
                    self.workdir,
                    self.acq_parser_hint,
                    self.cloud_buffer_distance,
                    self.cloud_shadow_buffer_distance,
                    self.parallax_test,
                )
Example #28
0
class TrainDevTestSplits(sciluigi.Task):
    train = luigi.FloatParameter(default=0.87)
    dev = luigi.FloatParameter(default=0.003)
    analysis = luigi.FloatParameter(default=0.127)

    in_processed = None

    def out_splits(self):
        return [
            TargetInfo(self, 'data/translate/splits/train.source'),
            TargetInfo(self, 'data/translate/splits/train.target'),
            TargetInfo(self, 'data/translate/splits/dev.source'),
            TargetInfo(self, 'data/translate/splits/dev.target'),
            TargetInfo(self, 'data/translate/splits/analysis.source'),
            TargetInfo(self, 'data/translate/splits/analysis.target')
        ]

    def run(self):
        self.ex('mkdir -p data/translate/splits')
        self.ex('rm data/translate/splits/* || true')

        assert self.train + self.dev + self.analysis == 1.
        lines = sum(1 for line in open(self.in_processed[0].path))
        split_counts = [
            0,
            int(lines * self.train),
            int(lines * self.dev),
            int(lines * self.analysis)
        ]
        split_idx = list(accumulate(split_counts))
        logging.info('Rough counts of train/dev/analysis sizes: \n\t%s\n\t%s' % \
                (str(split_counts), str(split_idx)))

        tup = tuple(
            open(out_file.path, 'wt') for out_file in self.out_splits())
        train_src, train_trg, dev_src, dev_trg, analysis_src, analysis_trg = tup

        for i, (src, trg) in enumerate(
                zip(open(self.in_processed[0].path),
                    open(self.in_processed[1].path))):
            if split_idx[1] > i:
                train_src.write(src)
                train_trg.write(trg)
            elif split_idx[2] > i:
                dev_src.write(src)
                dev_trg.write(trg)
            else:
                analysis_src.write(src)
                analysis_trg.write(trg)

        [i.close() for i in tup]
        call('wc -l data/translate/splits/*', shell=True)
Example #29
0
class ClusterL1C2(luigi.Task):
    """
	Level 1 clustering
	"""
    tissue = luigi.Parameter()
    a = luigi.FloatParameter(default=1)
    b = luigi.FloatParameter(default=10)
    c = luigi.FloatParameter(default=1)
    d = luigi.FloatParameter(default=10)
    n_factors = luigi.IntParameter(default=100)
    k_smoothing = luigi.IntParameter(default=100)
    k = luigi.IntParameter(default=10)
    log = luigi.BoolParameter(default=True)
    normalize = luigi.BoolParameter(default=True)
    accel = luigi.BoolParameter(default=False)

    def requires(self) -> luigi.Task:
        return am.PrepareTissuePool(tissue=self.tissue)

    def output(self) -> luigi.Target:
        return luigi.LocalTarget(
            os.path.join(
                am.paths().build,
                f"L1_{self.tissue}_nfactors={self.n_factors}_k={self.k}_ksmoothing={self.k_smoothing}_a={self.a}_b={self.b}_c={self.c}_d={self.d}_log={self.log}_normalize={self.normalize}_accel={self.accel}.loom"
            ))

    def run(self) -> None:
        logging = cg.logging(self)
        with self.output().temporary_path() as out_file:
            with loompy.connect(self.input().fn, mode="r") as ds:
                logging.info("Collecting valid cells")
                for (ix, selection, view) in ds.scan(
                        items=np.where(ds.col_attrs["_Valid"] == 1)[0],
                        axis=1,
                        key="Accession"):
                    loompy.create_append(out_file, view.layers, view.ra,
                                         view.ca)

            with loompy.connect(out_file) as ds:
                logging.info(f"Found {ds.shape[1]} valid cells")
                logging.info("Learning the manifold")
                cg.Cytograph2(accel=self.accel,
                              log=self.log,
                              normalize=self.normalize,
                              a=self.a,
                              b=self.b,
                              c=self.c,
                              d=self.d,
                              k=self.k,
                              k_smoothing=self.k_smoothing,
                              n_factors=self.n_factors,
                              max_iter=200).fit(ds)
Example #30
0
class GlobalParams(luigi.Config):

    model_name = luigi.Parameter()
    project_folder = luigi.Parameter()
    timestep = luigi.FloatParameter()
    dataset_id = luigi.IntParameter()
    endtime = luigi.FloatParameter()
    nb_past_timesteps = luigi.IntParameter()
    params_to_randomize = luigi.Parameter()
    nb_randomized_params = luigi.IntParameter()

    random_seed = luigi.IntParameter()

    # GenerateDataset
    nb_settings = luigi.IntParameter()
    nb_trajectories = luigi.IntParameter()

    # FormatDataset
    positivity = luigi.Parameter()
    test_fraction = luigi.FloatParameter()
    save_format = luigi.Parameter()

    # GenerateHistogramData
    nb_histogram_settings = luigi.IntParameter()
    nb_histogram_trajectories = luigi.IntParameter()
    histogram_endtime = luigi.FloatParameter()

    # Train
    model_id = luigi.IntParameter()
    nb_features = luigi.IntParameter()
    body_config_path = luigi.Parameter()
    mixture_config_path = luigi.Parameter()
    batch_size = luigi.IntParameter(default=256)
    add_noise = luigi.Parameter(default='false')
    stddev = luigi.FloatParameter(default=0.01)

    # TrainStatic
    n_epochs = luigi.IntParameter(default=100)

    # TrainSearch
    n_epochs_main = luigi.IntParameter(default=100)
    n_epochs_heat_up = luigi.IntParameter(default=20)
    n_epochs_arch = luigi.IntParameter(default=5)
    n_epochs_interval = luigi.IntParameter(default=5)
    n_epochs_finetune = luigi.IntParameter(default=30)

    # Evaluate
    distance_kind = luigi.Parameter(default='iou')
    target_species_names = luigi.Parameter(default='')
    time_lag_range = luigi.Parameter(default='10')
    settings_idxs_to_save_histograms = luigi.Parameter(default='0')