def __init__(self, gtf_file, landmarks=ALL_LANDMARKS, use_strand=True, **kwargs): super(DistToClosestLandmarkExtractor, self).__init__(gtf_file, **kwargs) self._gtf_file = gtf_file self.landmarks = extract_landmarks(gtf_file, landmarks=landmarks) self.columns = landmarks # column names. Reqired for concating distances into array self.use_strand = use_strand # set index to chromosome and strand - faster access self.landmarks = {k: v.set_index(["seqname", "strand"]) for k, v in six.iteritems(self.landmarks)}
def test_extract_landmarks(gtf): """Run for all the extractors """ ldm = extract_landmarks(gtf) assert isinstance(ldm, dict)