Esempio n. 1
0
def get_gap_overlap_positions(path, blocks, read_len, min_mappable=20):
    blocks_gaps = genome_blocks_gaps(blocks, path)
    m = min_mappable

    gap_ref = pyinter.IntervalSet()
    ref = pyinter.IntervalSet()
    pos = 0
    for b in blocks_gaps:
        if len(b) == 0:
            continue
        if not b.is_insertion():
            gap_ref.add(pyinter.closedopen(pos, pos + len(b)))
            if not b.is_gap:
                ref.add(pyinter.closedopen(pos, pos + len(b)))
        pos += len(b)
    # print('gap_ref: {0}\nref: {1}\n'.format(gap_ref, ref))

    A1 = pyinter.IntervalSet()  # i: [i, i+m) contained in gap_ref
    A2 = pyinter.IntervalSet()  # i: [i, i+m) overlaps ref
    for iv in gap_ref:
        if iv.lower_value <= iv.upper_value - m:
            A1.add(pyinter.closed(iv.lower_value, iv.upper_value - m))
    for iv in ref:
        # print(iv)
        A2.add(pyinter.closed(iv.lower_value - m + 1, iv.upper_value - 1))
        # print(A2)

    A3 = A1.intersection(A2)

    A4 = pyinter.IntervalSet()
    A5 = pyinter.IntervalSet()
    for iv in A1:
        A4.add(pyinter.closed(iv.lower_value - read_len + m, iv.upper_value))
    for iv in A3:
        A5.add(pyinter.closed(iv.lower_value - read_len + m, iv.upper_value))

    result = A4.difference(A5)

    # print('A1: {0}\nA2: {1}\nA3: {2}\nA4: {3}\nA5: {4}\n'.format(A1, A2, A3, A4, A5))
    # print('result: {0}'.format(result))
    # print('')

    # remove any empty intervals
    out = pyinter.IntervalSet()
    for iv in result:
        a = iv.lower_value - 1 if iv.lower_value in iv else iv.lower_value
        b = iv.upper_value + 1 if iv.upper_value in iv else iv.upper_value
        # if iv.lower_value in iv or iv.upper_value in iv: # not open
        #     print('A1: {0}\nA2: {1}\nA3: {2}\nA4: {3}\nA5: {4}\n'.format(A1, A2, A3, A4, A5))
        #     print('result: {0}'.format(result))
        #     print(iv)
        #     raise Warning('non-open interval in get_gap_positions')
        if a < b - 1:
            out.add(pyinter.open(a, b))
    return out
Esempio n. 2
0
 def test_persons_availability(self):
     avail = self.p.get_availability(
         self.range_start, self.range_finish)  # type: inter.IntervalSet
     expected = inter.IntervalSet([
         inter.closed(
             1491354000,
             1491368400),  # Wed, 05 Apr 2017 01:00:00 to 05:00:00 GMT
         # inter.closed(1491958800, 1491973200),  # Wed, 12 Apr 2017 01:00:00 to 05:00:00 GMT Second Tuesday!
         inter.closed(
             1492563600,
             1492578000),  # Wed, 19 Apr 2017 01:00:00 to 05:00:00 GMT
         inter.closed(
             1493168400,
             1493182800),  # Wed, 25 Apr 2017 01:00:00 to 05:00:00 GMT
     ])
     self.assertEqual(avail, expected)
Esempio n. 3
0
    def get_availability(self: ConcreteEntity, range_begin: date,
                         range_end: date) -> IntervalSet:
        available = IntervalSet([])
        unavailable = IntervalSet([])

        # Determine availability and unavailability according to entity's settings:
        for pos_tp in self.timepattern_set.filter(
                disposition=TimePattern.DISPOSITION_AVAILABLE).all():
            available += pos_tp.as_interval_set(range_begin, range_end)
        if available.empty():
            # If no positive timepatterns have been specified then we'll say that the pos part is infinite.
            # This makes it easy to specify things like "always available" and "always available except Fridays."
            # For the purpose of this method, "infite" translates to range_begin to range_end.
            make_ts = lambda d: dt2ts(
                pytz.utc.localize(datetime(d.year, d.month, d.day)))
            range_begin_ts = make_ts(range_begin)  # type: TimeStamp
            range_end_ts = make_ts(range_end)  # type: TimeStamp
            available.add(closed(range_begin_ts, range_end_ts))
        for neg_tp in self.timepattern_set.filter(
                disposition=TimePattern.DISPOSITION_UNAVAILABLE).all():
            unavailable += neg_tp.as_interval_set(range_begin, range_end)

        # Determine additional unavailability due to entity being involved in a scheduled class:
        for involvement in self.scheduled_class_involvements:  # type: EntityInScheduledClass
            if involvement.entitys_status == EntityInScheduledClass.STATUS_G2G:
                pass  # TODO

        return available - unavailable
Esempio n. 4
0
 def test_interval_set(self):
     iset = self.tp.as_interval_set(self.range_start, self.range_finish)
     expected = inter.IntervalSet([
         inter.closed(
             1491354000,
             1491368400),  # Wed, 05 Apr 2017 01:00:00 to 05:00:00 GMT
         inter.closed(
             1491958800,
             1491973200),  # Wed, 12 Apr 2017 01:00:00 to 05:00:00 GMT
         inter.closed(
             1492563600,
             1492578000),  # Wed, 19 Apr 2017 01:00:00 to 05:00:00 GMT
         inter.closed(
             1493168400,
             1493182800),  # Wed, 25 Apr 2017 01:00:00 to 05:00:00 GMT
     ])
     self.assertEqual(iset, expected)
Esempio n. 5
0
def instantiate_class(ct: ClassTemplate, dt: datetime) -> ScheduledClass:
    assert dt.tzinfo is not None and dt.tzinfo.utcoffset(
        dt) is not None  # I.e. dt is not naive
    eicts = []
    for pict in ct.personinclasstemplate_set.all():
        eicts.append(pict)
    for rict in ct.resourceinclasstemplate_set.all():
        eicts.append(rict)
    begin_ts = dt2ts(dt)
    ival = inter.closed(begin_ts, begin_ts + 3600)
    ga = GroupAvailability(eicts, ival)
    return ct.instantiate(ga)
Esempio n. 6
0
    def as_interval_set(self, start: date, finish: date) -> IntervalSet:
        """
            Return an interval set representation of the TimePattern between two bounds.
            
            The intervals are closed and expressed using Unix timestamps (the number of seconds 
            since 1970-01-01 UTC, not counting leap seconds). Since TimePattern defines an infinite
            sequence of intervals across all time, this function takes a starting date and ending date.
            Only those intervals with start times that fall between the starting and ending date are 
            returned in the interval set result.            
        """
        dow_dict = {
            "Mo": 0,
            "Tu": 1,
            "We": 2,
            "Th": 3,
            "Fr": 4,
            "Sa": 5,
            "Su": 6
        }

        # REVIEW: Is there a more efficient implementation that uses dateutil.rrule?

        tz = timezone.get_current_timezone()
        iset = IntervalSet([])  # type: IntervalSet
        d = start - timedelta(days=1)  # type: date
        while d <= finish:
            d = d + timedelta(days=1)

            if dow_dict[self.dow] != d.weekday():
                continue

            if self.wom == self.WOM_LAST:
                if not is_last_xxxday_of_month(d):
                    continue

            if self.wom not in [self.WOM_LAST, self.WOM_EVERY]:
                nth = int(self.wom)
                if not is_nth_xxxday_of_month(d, nth):
                    continue

            am_pm_adjust = 0 if self.morning else 12
            inter_start_dt = tz.localize(
                datetime(d.year, d.month, d.day, self.hour + am_pm_adjust,
                         self.minute))
            inter_start = dt2ts(inter_start_dt)
            inter_end = int(inter_start + self.duration * 3600)
            iset.add(closed(inter_start, inter_end))

        return iset
Esempio n. 7
0
def constraints_unknown_sigma( \
    support_directions,
    RHS_offsets,
    LHS_offsets,
    observed_data,
    direction_of_interest,
    RSS,
    RSS_df,
    value_under_null=0.,
    tol = 1.e-4,
    DEBUG=False):
    r"""
    Given a quasi-affine constraint $\{z:Az+u \leq \hat{\sigma}b\}$ 
    (elementwise)
    specified with $A$ as `support_directions` and $b$ as
    `support_offset`, a new direction of interest $\eta$, and
    an `observed_data` is Gaussian vector $Z \sim N(\mu,\sigma^2 I)$ 
    with $\sigma$ unknown, this
    function returns $\eta^TZ$ as well as a set
    bounding this value. The value of $\hat{\sigma}$ is taken to be
    sqrt(RSS/RSS_df)

    The interval constructed is such that the endpoints are 
    independent of $\eta^TZ$, hence the 
    selective $T$ distribution of
    of `sample carving`_
    can be used to form an exact pivot.

    To construct the interval, we are in effect conditioning
    on all randomness perpendicular to the direction of interest,
    i.e. $P_{\eta}^{\perp}X$ where $X$ is the Gaussian data vector.

    Notes
    -----

    Covariance is assumed to be an unknown multiple of the identity.

    Parameters
    ----------

    support_directions : np.float
         Matrix specifying constraint, $A$.

    RHS : np.float
         Offset in constraint, $b$.

    LHS_offsets : np.float
         Offset in LHS of constraint, $u$.

    observed_data : np.float
         Observations.

    direction_of_interest : np.float
         Direction in which we're interested for the
         contrast.

    RSS : float
        Residual sum of squares.

    RSS_df : int
        Degrees of freedom of RSS.

    tol : float
         Relative tolerance parameter for deciding 
         sign of $Az-b$.

    Returns
    -------

    lower_bound : float

    observed : float

    upper_bound : float

    sigma : float

    """

    # shorthand
    A, b, L, X, w, theta = (support_directions, RHS_offsets, LHS_offsets,
                            observed_data, direction_of_interest,
                            value_under_null)

    # make direction of interest a unit vector

    normw = np.linalg.norm(w)
    w = w / normw
    theta = theta / normw

    sigma_hat = np.sqrt(RSS / RSS_df)

    # compute the sufficient statistics

    U = (w * X).sum() - theta
    V = X - (X * w).sum() * w
    W = sigma_hat**2 * RSS_df + U**2
    Tobs = U / np.sqrt((W - U**2) / RSS_df)
    sqrtW = np.sqrt(W)
    alpha = np.dot(A, w)

    gamma = theta * alpha + np.dot(A, V) + L

    Anorm = np.fabs(A).max()

    intervals = []
    intervals = []
    for _a, _b, _c in zip(alpha, b, gamma):
        _a = _a * sqrtW
        _b = _b * sqrtW
        cur_intervals = sqrt_inequality_solver(_a, _c, _b, RSS_df)
        intervals.append(
            pyinter.IntervalSet(
                [pyinter.closed(*i) for i in cur_intervals if i]))

    truncation_set = intervals[0]
    for interv in intervals[1:]:
        truncation_set = truncation_set.intersection(interv)
    if not truncation_set:
        raise ValueError("empty truncation intervals")
    return truncation_set, Tobs
def constraints_unknown_sigma( \
    support_directions, 
    RHS_offsets,
    LHS_offsets,
    observed_data, 
    direction_of_interest,
    RSS,
    RSS_df,
    value_under_null=0.,
    tol = 1.e-4,
    DEBUG=False):
    r"""
    Given a quasi-affine constraint $\{z:Az+u \leq \hat{\sigma}b\}$ 
    (elementwise)
    specified with $A$ as `support_directions` and $b$ as
    `support_offset`, a new direction of interest $\eta$, and
    an `observed_data` is Gaussian vector $Z \sim N(\mu,\sigma^2 I)$ 
    with $\sigma$ unknown, this
    function returns $\eta^TZ$ as well as a set
    bounding this value. The value of $\hat{\sigma}$ is taken to be
    sqrt(RSS/RSS_df)

    The interval constructed is such that the endpoints are 
    independent of $\eta^TZ$, hence the 
    selective $T$ distribution of
    of `sample carving`_
    can be used to form an exact pivot.

    To construct the interval, we are in effect conditioning
    on all randomness perpendicular to the direction of interest,
    i.e. $P_{\eta}^{\perp}X$ where $X$ is the Gaussian data vector.

    Notes
    -----

    Covariance is assumed to be an unknown multiple of the identity.

    Parameters
    ----------

    support_directions : np.float
         Matrix specifying constraint, $A$.

    RHS : np.float
         Offset in constraint, $b$.

    LHS_offsets : np.float
         Offset in LHS of constraint, $u$.

    observed_data : np.float
         Observations.

    direction_of_interest : np.float
         Direction in which we're interested for the
         contrast.

    RSS : float
        Residual sum of squares.

    RSS_df : int
        Degrees of freedom of RSS.

    tol : float
         Relative tolerance parameter for deciding 
         sign of $Az-b$.

    Returns
    -------

    lower_bound : float

    observed : float

    upper_bound : float

    sigma : float

    """

    # shorthand
    A, b, L, X, w, theta = (support_directions,
                            RHS_offsets,
                            LHS_offsets,
                            observed_data,
                            direction_of_interest,
                            value_under_null)

    # make direction of interest a unit vector

    normw = np.linalg.norm(w)
    w = w / normw
    theta = theta / normw

    sigma_hat = np.sqrt(RSS / RSS_df)

    # compute the sufficient statistics

    U = (w*X).sum() - theta
    V = X - (X*w).sum() * w
    W = sigma_hat**2 * RSS_df + U**2
    Tobs = U / np.sqrt((W - U**2) / RSS_df)
    sqrtW = np.sqrt(W)
    alpha = np.dot(A, w)

    gamma = theta * alpha + np.dot(A, V) + L

    Anorm = np.fabs(A).max()

    intervals = []
    intervals = []
    for _a, _b, _c in zip(alpha, b, gamma):
        _a = _a * sqrtW
        _b = _b * sqrtW
        cur_intervals = sqrt_inequality_solver(_a, _c, _b, RSS_df)
        intervals.append(pyinter.IntervalSet([pyinter.closed(*i) for i in cur_intervals if i]))

    truncation_set = intervals[0]
    for interv in intervals[1:]:
        truncation_set = truncation_set.intersection(interv)
    if not truncation_set:
        raise ValueError("empty truncation intervals")
    return truncation_set, Tobs
Esempio n. 9
0
def get_time_intervals(time_points):
    inter_lst = []
    for start, end in pairwise(time_points):
        inter_lst.append(pyinter.closed(start, end))
    intervalSet = pyinter.IntervalSet(inter_lst)
    return intervalSet