def get_gap_overlap_positions(path, blocks, read_len, min_mappable=20): blocks_gaps = genome_blocks_gaps(blocks, path) m = min_mappable gap_ref = pyinter.IntervalSet() ref = pyinter.IntervalSet() pos = 0 for b in blocks_gaps: if len(b) == 0: continue if not b.is_insertion(): gap_ref.add(pyinter.closedopen(pos, pos + len(b))) if not b.is_gap: ref.add(pyinter.closedopen(pos, pos + len(b))) pos += len(b) # print('gap_ref: {0}\nref: {1}\n'.format(gap_ref, ref)) A1 = pyinter.IntervalSet() # i: [i, i+m) contained in gap_ref A2 = pyinter.IntervalSet() # i: [i, i+m) overlaps ref for iv in gap_ref: if iv.lower_value <= iv.upper_value - m: A1.add(pyinter.closed(iv.lower_value, iv.upper_value - m)) for iv in ref: # print(iv) A2.add(pyinter.closed(iv.lower_value - m + 1, iv.upper_value - 1)) # print(A2) A3 = A1.intersection(A2) A4 = pyinter.IntervalSet() A5 = pyinter.IntervalSet() for iv in A1: A4.add(pyinter.closed(iv.lower_value - read_len + m, iv.upper_value)) for iv in A3: A5.add(pyinter.closed(iv.lower_value - read_len + m, iv.upper_value)) result = A4.difference(A5) # print('A1: {0}\nA2: {1}\nA3: {2}\nA4: {3}\nA5: {4}\n'.format(A1, A2, A3, A4, A5)) # print('result: {0}'.format(result)) # print('') # remove any empty intervals out = pyinter.IntervalSet() for iv in result: a = iv.lower_value - 1 if iv.lower_value in iv else iv.lower_value b = iv.upper_value + 1 if iv.upper_value in iv else iv.upper_value # if iv.lower_value in iv or iv.upper_value in iv: # not open # print('A1: {0}\nA2: {1}\nA3: {2}\nA4: {3}\nA5: {4}\n'.format(A1, A2, A3, A4, A5)) # print('result: {0}'.format(result)) # print(iv) # raise Warning('non-open interval in get_gap_positions') if a < b - 1: out.add(pyinter.open(a, b)) return out
def test_persons_availability(self): avail = self.p.get_availability( self.range_start, self.range_finish) # type: inter.IntervalSet expected = inter.IntervalSet([ inter.closed( 1491354000, 1491368400), # Wed, 05 Apr 2017 01:00:00 to 05:00:00 GMT # inter.closed(1491958800, 1491973200), # Wed, 12 Apr 2017 01:00:00 to 05:00:00 GMT Second Tuesday! inter.closed( 1492563600, 1492578000), # Wed, 19 Apr 2017 01:00:00 to 05:00:00 GMT inter.closed( 1493168400, 1493182800), # Wed, 25 Apr 2017 01:00:00 to 05:00:00 GMT ]) self.assertEqual(avail, expected)
def get_availability(self: ConcreteEntity, range_begin: date, range_end: date) -> IntervalSet: available = IntervalSet([]) unavailable = IntervalSet([]) # Determine availability and unavailability according to entity's settings: for pos_tp in self.timepattern_set.filter( disposition=TimePattern.DISPOSITION_AVAILABLE).all(): available += pos_tp.as_interval_set(range_begin, range_end) if available.empty(): # If no positive timepatterns have been specified then we'll say that the pos part is infinite. # This makes it easy to specify things like "always available" and "always available except Fridays." # For the purpose of this method, "infite" translates to range_begin to range_end. make_ts = lambda d: dt2ts( pytz.utc.localize(datetime(d.year, d.month, d.day))) range_begin_ts = make_ts(range_begin) # type: TimeStamp range_end_ts = make_ts(range_end) # type: TimeStamp available.add(closed(range_begin_ts, range_end_ts)) for neg_tp in self.timepattern_set.filter( disposition=TimePattern.DISPOSITION_UNAVAILABLE).all(): unavailable += neg_tp.as_interval_set(range_begin, range_end) # Determine additional unavailability due to entity being involved in a scheduled class: for involvement in self.scheduled_class_involvements: # type: EntityInScheduledClass if involvement.entitys_status == EntityInScheduledClass.STATUS_G2G: pass # TODO return available - unavailable
def test_interval_set(self): iset = self.tp.as_interval_set(self.range_start, self.range_finish) expected = inter.IntervalSet([ inter.closed( 1491354000, 1491368400), # Wed, 05 Apr 2017 01:00:00 to 05:00:00 GMT inter.closed( 1491958800, 1491973200), # Wed, 12 Apr 2017 01:00:00 to 05:00:00 GMT inter.closed( 1492563600, 1492578000), # Wed, 19 Apr 2017 01:00:00 to 05:00:00 GMT inter.closed( 1493168400, 1493182800), # Wed, 25 Apr 2017 01:00:00 to 05:00:00 GMT ]) self.assertEqual(iset, expected)
def instantiate_class(ct: ClassTemplate, dt: datetime) -> ScheduledClass: assert dt.tzinfo is not None and dt.tzinfo.utcoffset( dt) is not None # I.e. dt is not naive eicts = [] for pict in ct.personinclasstemplate_set.all(): eicts.append(pict) for rict in ct.resourceinclasstemplate_set.all(): eicts.append(rict) begin_ts = dt2ts(dt) ival = inter.closed(begin_ts, begin_ts + 3600) ga = GroupAvailability(eicts, ival) return ct.instantiate(ga)
def as_interval_set(self, start: date, finish: date) -> IntervalSet: """ Return an interval set representation of the TimePattern between two bounds. The intervals are closed and expressed using Unix timestamps (the number of seconds since 1970-01-01 UTC, not counting leap seconds). Since TimePattern defines an infinite sequence of intervals across all time, this function takes a starting date and ending date. Only those intervals with start times that fall between the starting and ending date are returned in the interval set result. """ dow_dict = { "Mo": 0, "Tu": 1, "We": 2, "Th": 3, "Fr": 4, "Sa": 5, "Su": 6 } # REVIEW: Is there a more efficient implementation that uses dateutil.rrule? tz = timezone.get_current_timezone() iset = IntervalSet([]) # type: IntervalSet d = start - timedelta(days=1) # type: date while d <= finish: d = d + timedelta(days=1) if dow_dict[self.dow] != d.weekday(): continue if self.wom == self.WOM_LAST: if not is_last_xxxday_of_month(d): continue if self.wom not in [self.WOM_LAST, self.WOM_EVERY]: nth = int(self.wom) if not is_nth_xxxday_of_month(d, nth): continue am_pm_adjust = 0 if self.morning else 12 inter_start_dt = tz.localize( datetime(d.year, d.month, d.day, self.hour + am_pm_adjust, self.minute)) inter_start = dt2ts(inter_start_dt) inter_end = int(inter_start + self.duration * 3600) iset.add(closed(inter_start, inter_end)) return iset
def constraints_unknown_sigma( \ support_directions, RHS_offsets, LHS_offsets, observed_data, direction_of_interest, RSS, RSS_df, value_under_null=0., tol = 1.e-4, DEBUG=False): r""" Given a quasi-affine constraint $\{z:Az+u \leq \hat{\sigma}b\}$ (elementwise) specified with $A$ as `support_directions` and $b$ as `support_offset`, a new direction of interest $\eta$, and an `observed_data` is Gaussian vector $Z \sim N(\mu,\sigma^2 I)$ with $\sigma$ unknown, this function returns $\eta^TZ$ as well as a set bounding this value. The value of $\hat{\sigma}$ is taken to be sqrt(RSS/RSS_df) The interval constructed is such that the endpoints are independent of $\eta^TZ$, hence the selective $T$ distribution of of `sample carving`_ can be used to form an exact pivot. To construct the interval, we are in effect conditioning on all randomness perpendicular to the direction of interest, i.e. $P_{\eta}^{\perp}X$ where $X$ is the Gaussian data vector. Notes ----- Covariance is assumed to be an unknown multiple of the identity. Parameters ---------- support_directions : np.float Matrix specifying constraint, $A$. RHS : np.float Offset in constraint, $b$. LHS_offsets : np.float Offset in LHS of constraint, $u$. observed_data : np.float Observations. direction_of_interest : np.float Direction in which we're interested for the contrast. RSS : float Residual sum of squares. RSS_df : int Degrees of freedom of RSS. tol : float Relative tolerance parameter for deciding sign of $Az-b$. Returns ------- lower_bound : float observed : float upper_bound : float sigma : float """ # shorthand A, b, L, X, w, theta = (support_directions, RHS_offsets, LHS_offsets, observed_data, direction_of_interest, value_under_null) # make direction of interest a unit vector normw = np.linalg.norm(w) w = w / normw theta = theta / normw sigma_hat = np.sqrt(RSS / RSS_df) # compute the sufficient statistics U = (w * X).sum() - theta V = X - (X * w).sum() * w W = sigma_hat**2 * RSS_df + U**2 Tobs = U / np.sqrt((W - U**2) / RSS_df) sqrtW = np.sqrt(W) alpha = np.dot(A, w) gamma = theta * alpha + np.dot(A, V) + L Anorm = np.fabs(A).max() intervals = [] intervals = [] for _a, _b, _c in zip(alpha, b, gamma): _a = _a * sqrtW _b = _b * sqrtW cur_intervals = sqrt_inequality_solver(_a, _c, _b, RSS_df) intervals.append( pyinter.IntervalSet( [pyinter.closed(*i) for i in cur_intervals if i])) truncation_set = intervals[0] for interv in intervals[1:]: truncation_set = truncation_set.intersection(interv) if not truncation_set: raise ValueError("empty truncation intervals") return truncation_set, Tobs
def constraints_unknown_sigma( \ support_directions, RHS_offsets, LHS_offsets, observed_data, direction_of_interest, RSS, RSS_df, value_under_null=0., tol = 1.e-4, DEBUG=False): r""" Given a quasi-affine constraint $\{z:Az+u \leq \hat{\sigma}b\}$ (elementwise) specified with $A$ as `support_directions` and $b$ as `support_offset`, a new direction of interest $\eta$, and an `observed_data` is Gaussian vector $Z \sim N(\mu,\sigma^2 I)$ with $\sigma$ unknown, this function returns $\eta^TZ$ as well as a set bounding this value. The value of $\hat{\sigma}$ is taken to be sqrt(RSS/RSS_df) The interval constructed is such that the endpoints are independent of $\eta^TZ$, hence the selective $T$ distribution of of `sample carving`_ can be used to form an exact pivot. To construct the interval, we are in effect conditioning on all randomness perpendicular to the direction of interest, i.e. $P_{\eta}^{\perp}X$ where $X$ is the Gaussian data vector. Notes ----- Covariance is assumed to be an unknown multiple of the identity. Parameters ---------- support_directions : np.float Matrix specifying constraint, $A$. RHS : np.float Offset in constraint, $b$. LHS_offsets : np.float Offset in LHS of constraint, $u$. observed_data : np.float Observations. direction_of_interest : np.float Direction in which we're interested for the contrast. RSS : float Residual sum of squares. RSS_df : int Degrees of freedom of RSS. tol : float Relative tolerance parameter for deciding sign of $Az-b$. Returns ------- lower_bound : float observed : float upper_bound : float sigma : float """ # shorthand A, b, L, X, w, theta = (support_directions, RHS_offsets, LHS_offsets, observed_data, direction_of_interest, value_under_null) # make direction of interest a unit vector normw = np.linalg.norm(w) w = w / normw theta = theta / normw sigma_hat = np.sqrt(RSS / RSS_df) # compute the sufficient statistics U = (w*X).sum() - theta V = X - (X*w).sum() * w W = sigma_hat**2 * RSS_df + U**2 Tobs = U / np.sqrt((W - U**2) / RSS_df) sqrtW = np.sqrt(W) alpha = np.dot(A, w) gamma = theta * alpha + np.dot(A, V) + L Anorm = np.fabs(A).max() intervals = [] intervals = [] for _a, _b, _c in zip(alpha, b, gamma): _a = _a * sqrtW _b = _b * sqrtW cur_intervals = sqrt_inequality_solver(_a, _c, _b, RSS_df) intervals.append(pyinter.IntervalSet([pyinter.closed(*i) for i in cur_intervals if i])) truncation_set = intervals[0] for interv in intervals[1:]: truncation_set = truncation_set.intersection(interv) if not truncation_set: raise ValueError("empty truncation intervals") return truncation_set, Tobs
def get_time_intervals(time_points): inter_lst = [] for start, end in pairwise(time_points): inter_lst.append(pyinter.closed(start, end)) intervalSet = pyinter.IntervalSet(inter_lst) return intervalSet