def findStartTime(timeColumnsDict=None, numberPoints=0, timePeriod='D'):
    """given time columns, end time and numberPoints return the python date time object"""
    logger = _getLogger(__name__)
    logger.info("in findStartTime")
    if 'month_id' in timeColumnsDict:
        logger.info("found month_id")
        timestamp = _CSTimeToDatetime(month_id=timeColumnsDict['month_id'] -
                                      numberPoints)
    elif 'week_id' in timeColumnsDict:
        logger.info("found week_id")
        timestamp = _CSTimeToDatetime(week_id=timeColumnsDict['week_id'] -
                                      numberPoints)
    elif 'hour_id' in timeColumnsDict and 'time_id' in timeColumnsDict:
        logger.info("found time_id and hour_id")
        timestamp = _CSTimeToDatetime(time_id=timeColumnsDict['time_id'],
                                      hour_id=timeColumnsDict['hour_id'] -
                                      numberPoints)
    elif 'time_id' in timeColumnsDict:
        logger.info("found time_id ")
        timestamp = _CSTimeToDatetime(time_id=timeColumnsDict['time_id'] -
                                      numberPoints)
    elif len(timeColumnsDict) == 1:  # assume it is already a timestamp
        #for single column it is assumed that the time column is seconds since epoch

        secondsSinceEpoch = timeColumnsDict[next(iter(timeColumnsDict))]
        utcDateTime = _datetime.fromtimestamp(secondsSinceEpoch, _timezone.utc)
        time_id = 0
        hour_id = 0
        week_id = 0
        month_id = 0
        #set the 1 that should be in the delta based on the time parameter
        needDelta = True
        if timePeriod == 'daily':
            time_id = -1 * numberPoints
        elif timePeriod == 'hourly':
            hour_id = -1 * numberPoints
        elif timePeriod == 'monthly':
            month_id = -1 * numberPoints
        elif timePeriod == 'weekly':
            week_id = -1 * numberPoints
        elif timePeriod == 'unknown':
            needDelta = False
            logger.warn("unknown time type defaulting to start time as epoch")
            timestamp = _datetime(1970, 1, 1, 0, 0, 0, 0, _pytz.UTC)
        else:
            needDelta = False
            logger.warn(
                "unsupported time type defaulting to start time as epoch")
            timestamp = _datetime(1970, 1, 1, 0, 0, 0, 0, _pytz.UTC)
        if needDelta:
            timestamp = utcDateTime + _timedelta(
                days=time_id,
                hours=hour_id,
            ) + _relativedelta(months=month_id, weeks=week_id)
    return timestamp
Esempio n. 2
0
def localtime(date):
    """Convert a PyEphem date into local time, returning a Python datetime."""
    microseconds = int(round(24 * 60 * 60 * 1000000 * date))
    seconds, microseconds = divmod(microseconds, 1000000)
    seconds -= 2209032000  # difference between epoch 1900 and epoch 1970
    y, m, d, H, M, S, wday, yday, isdst = _localtime(seconds)
    return _datetime(y, m, d, H, M, S, microseconds)
Esempio n. 3
0
    def get_datetimes(self, fmt='datetime'):
        """
        Returns the datetimes of the reports.

        Parameters
        ----------
          fmt      datetime return type:
                     'datetime' - datetime object
                     'string'   - string in YYYY-MM-DD HH:MM format
                     'int'      - integer list in [YYYYMMDD, HHMM] format
                     'unix'     - Unix timestamp
        """
        assert fmt in ('datetime', 'string', 'int', 'unix'), "Invalid format \'%s\'" % fmt

        dts = []
        for i in xrange(self.nrep):

            d = _datetime(self.year[i], self.month[i], self.day[i], self.hour[i], self.minute[i])

            if fmt in ('string', 'int'):
                d = d.isoformat().replace('T', ' ')
                d = d[:[x.start() for x in _re.finditer(':', d)][-1]]
                if fmt=='int':
                    d = [ int(i) for i in d.replace('-', '').replace(':', '').split() ]
            elif fmt=='unix':
                d = _timegm(d.timetuple())

            dts.append(d)

        return dts
Esempio n. 4
0
    def plot_total_activity(self, start_time, end_time, dt, activity_type='both', return_vals=False):
        start_ms = _datetime_to_tstamp(start_time)
        end_ms = _datetime_to_tstamp(end_time)
        dt_ms = dt * 1000
        time_ax = _np.arange(start_ms, end_ms, dt_ms)
        
        timefmt = _mdates.DateFormatter('%H:%M')
        fig = _plt.figure()
        ax = fig.add_subplot(111)
        ax.xaxis_date()
        ax.xaxis.set_major_formatter(timefmt)

        tot_hist = _np.zeros_like(time_ax[:-1])
        for s in self.stations:
            try:
                tot_hist += self.stations[s].activity_histogram(time_ax, activity_type=activity_type)
            except:
                print("Getting activity failed for station",\
                  "%d with error:\n  %s" %\
                  (self.stations[s].station_id, _sys.exc_info()[1]))

        times = [_tstamp_to_datetime(t) for t in time_ax[:-1]]
        ax.plot(times, tot_hist, c="0.3", lw=2)

        curr_line = _datetime(start_time.year, start_time.month, start_time.day)
        while curr_line < end_time:
            ax.axvline(curr_line, color='0.5', ls='--')
            curr_line += _timedelta(days=1)

        ax.set_xlim(start_time, end_time)
        ax.set_xlabel("Time")
        ax.set_ylabel("Total activity across city")

        if return_vals:
            return times, time_ax, tot_hist
Esempio n. 5
0
def localtime(date):
    """Convert a PyEphem date into local time, returning a Python datetime."""
    microseconds = int(round(24 * 60 * 60 * 1000000 * date))
    seconds, microseconds = divmod(microseconds, 1000000)
    seconds -= 2209032000  # difference between epoch 1900 and epoch 1970
    y, m, d, H, M, S, wday, yday, isdst = _localtime(seconds)
    return _datetime(y, m, d, H, M, S, microseconds)
Esempio n. 6
0
def from_isodatetime_2_timestamp(dtime, hours=0, minutes=0):
    """
    
    Converts a ISO datetime to Unix Timestamp
    
    :param dtime: Datetime in YYYY-MM-DD HH:MM:SS format
    
    :param hours: Hours to adjust the timezone
    :param minutes: Minutes to adjust the timezone
    
    :return: Timestamp of the dtime 
    
    """
    p = _compile(r'(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})')
    m = p.search(dtime).groups()
    # year, month, day, hour, minute, second, microsecond
    t = _datetime(year=int(m[0]),
                  month=int(m[1]),
                  day=int(m[2]),
                  hour=int(m[3]),
                  minute=int(m[4]),
                  second=int(m[5]),
                  tzinfo=_timezone(_timedelta(hours=hours,
                                              minutes=minutes)))  # UTC
    return int(t.timestamp())
Esempio n. 7
0
    def _extract_echo_retval_from_csv(self, response):

        strio = _StringIO(response.text)
        reader = _csv.reader(strio)
        year_str, month_str, day_str, hour_str, minute_str, second_str, \
         microsecond_str, tz_str = \
            reader.next()
        with self.assertRaises(StopIteration):
            reader.next()

        year = int(year_str)
        month = int(month_str)
        day = int(day_str)
        hour = int(hour_str or 0)
        minute = int(minute_str or 0)
        second = int(second_str or 0)
        microsecond = int(microsecond_str or 0)

        tz_match = _re.match(r'(?P<tz_sign>[-+])(?P<tz_hours>\d\d)'
                              r'(?P<tz_minutes>\d\d)?',
                             tz_str)
        if tz_match:
            tz_sign = tz_match.group('tz_sign')
            tz_hours = int(tz_sign + tz_match.group('tz_hours'))
            tz_minutes = int(tz_sign + (tz_match.group('tz_minutes') or '0'))
            tz_minutes += tz_hours * 60
            tzinfo = _tz.FixedOffset(tz_minutes)
        else:
            tzinfo = _tz.UTC

        return _datetime(year, month, day, hour, minute, second, microsecond,
                         tzinfo)
Esempio n. 8
0
    def __new__(cls, year, month, day, hour=0, minute=0, second=0, microsecond=0, tzinfo=None):

        dt = _datetime(year, month, day, hour, minute, second, microsecond, tzinfo=tzinfo)
        if tzinfo is None and getattr(settings, "USE_TZ", False):
            default_timezone = timezone.get_default_timezone()
            dt = timezone.make_aware(dt, default_timezone)
        return dt
Esempio n. 9
0
def get_first_of_following_month(utc_now: _datetime) -> _datetime:
    year = utc_now.year
    month = utc_now.month + 1
    if (month == 13):
        year += 1
        month = 1
    result = _datetime(year, month, 1, 0, 0, 0, 0, _timezone.utc)
    return result
Esempio n. 10
0
def _dec_year(date_num):
    date = _datetime.strptime(date_num, '%Y%m%d')

    def sinceEpoch(date):  # returns seconds since epoch
        return _time.mktime(date.timetuple())

    s = sinceEpoch

    year = date.year
    startOfThisYear = _datetime(year=year, month=1, day=1)
    startOfNextYear = _datetime(year=year + 1, month=1, day=1)

    yearElapsed = s(date) - s(startOfThisYear)
    yearDuration = s(startOfNextYear) - s(startOfThisYear)
    fraction = yearElapsed / yearDuration

    return date.year + fraction
Esempio n. 11
0
def get_next_day(utc_now: _datetime = None) -> _datetime:
    utc_now = utc_now or get_utc_now()
    result = _datetime(utc_now.year,
                       utc_now.month,
                       utc_now.day,
                       tzinfo=_timezone.utc)
    result = result + ONE_DAY
    return result
Esempio n. 12
0
def __add_years(inc, date):
    # years don't impact months or days. Simple addition.
    year = int(date.year + inc)
    month = date.month
    day = min(date.day,_calendar.monthrange(year,month)[1])

    retdate = _datetime(year=year, month=month, day=day)

    return retdate
Esempio n. 13
0
def _dict_to_tuple(d):
    '''Convert a dictionary to a time tuple.  Depends on key values in the
    regexp pattern!
    '''    
    # TODO: Adding a ms field to struct_time tuples is problematic 
    # since they don't have this field.  Should use datetime
    # which has a microseconds field, else no ms..  When mapping struct_time 
    # to gDateTime the last 3 fields are irrelevant, here using dummy values to make
    # everything happy.
    # 

    retval = _niltime[:]
    for k,i in ( ('Y', 0), ('M', 1), ('D', 2), ('h', 3), ('m', 4), ):
        v = d.get(k)
        if v: retval[i] = int(v)
        
    v = d.get('s')
    if v:
        msec,sec = _modf(float(v))
        retval[6],retval[5] = int(round(msec*1000)), int(sec)
            
    v = d.get('tz')
    if v and v != 'Z':
        h,m = map(int, v.split(':'))
        # check for time zone offset, if within the same timezone, 
        # ignore offset specific calculations
        offset=_localtimezone().utcoffset(_datetime.now())
        local_offset_hour = offset.seconds/3600
        local_offset_min = (offset.seconds%3600)%60
        if local_offset_hour > 12: 
            local_offset_hour -= 24
            
        if local_offset_hour != h or local_offset_min != m:                
            if h<0:
                #TODO: why is this set to server
                #foff = _fixedoffset(-((abs(h)*60+m)),"server")
                foff = _fixedoffset(-((abs(h)*60+m)))
            else:
                #TODO: why is this set to server
                #foff = _fixedoffset((abs(h)*60+m),"server")
                foff = _fixedoffset((abs(h)*60+m)) 
                
            dt = _datetime(retval[0],retval[1],retval[2],retval[3],retval[4],
                           retval[5],0,foff)
            
            # update dict with calculated timezone
            localdt=dt.astimezone(_localtimezone())
            retval[0] = localdt.year
            retval[1] = localdt.month
            retval[2] = localdt.day
            retval[3] = localdt.hour
            retval[4] = localdt.minute
            retval[5] = localdt.second
            
    if d.get('neg', 0):
        retval[0:5] = map(operator.__neg__, retval[0:5])
    return tuple(retval)
Esempio n. 14
0
def _dict_to_tuple(d):
    '''Convert a dictionary to a time tuple.  Depends on key values in the
    regexp pattern!
    '''    
    # TODO: Adding a ms field to struct_time tuples is problematic 
    # since they don't have this field.  Should use datetime
    # which has a microseconds field, else no ms..  When mapping struct_time 
    # to gDateTime the last 3 fields are irrelevant, here using dummy values to make
    # everything happy.
    # 

    retval = _niltime[:]
    for k,i in ( ('Y', 0), ('M', 1), ('D', 2), ('h', 3), ('m', 4), ):
        v = d.get(k)
        if v: retval[i] = int(v)
        
    v = d.get('s')
    if v:
        msec,sec = _modf(float(v))
        retval[6],retval[5] = int(round(msec*1000)), int(sec)
            
    v = d.get('tz')
    if v and v != 'Z':
        h,m = map(int, v.split(':'))
        # check for time zone offset, if within the same timezone, 
        # ignore offset specific calculations
        offset=_localtimezone().utcoffset(_datetime.now())
        local_offset_hour = offset.seconds/3600
        local_offset_min = (offset.seconds%3600)%60
        if local_offset_hour > 12: 
            local_offset_hour -= 24
            
        if local_offset_hour != h or local_offset_min != m:                
            if h<0:
                #TODO: why is this set to server
                #foff = _fixedoffset(-((abs(h)*60+m)),"server")
                foff = _fixedoffset(-((abs(h)*60+m)))
            else:
                #TODO: why is this set to server
                #foff = _fixedoffset((abs(h)*60+m),"server")
                foff = _fixedoffset((abs(h)*60+m)) 
                
            dt = _datetime(retval[0],retval[1],retval[2],retval[3],retval[4],
                           retval[5],0,foff)
            
            # update dict with calculated timezone
            localdt=dt.astimezone(_localtimezone())
            retval[0] = localdt.year
            retval[1] = localdt.month
            retval[2] = localdt.day
            retval[3] = localdt.hour
            retval[4] = localdt.minute
            retval[5] = localdt.second
            
    if d.get('neg', 0):
        retval[0:5] = map(operator.__neg__, retval[0:5])
    return tuple(retval)
Esempio n. 15
0
def _dict_to_tuple(d):
    '''Convert a dictionary to a time tuple.  Depends on key values in the
    regexp pattern!
    '''
    retval = _niltime[:]
    for k, i in (
        ('Y', 0),
        ('M', 1),
        ('D', 2),
        ('h', 3),
        ('m', 4),
    ):
        v = d.get(k)
        if v: retval[i] = int(v)

    v = d.get('s')
    if v:
        msec, sec = _modf(float(v))
        retval[6], retval[5] = int(round(msec * 1000)), int(sec)

    v = d.get('tz')
    if v and v != 'Z':
        h, m = map(int, v.split(':'))
        # check for time zone offset, if within the same timezone,
        # ignore offset specific calculations
        offset = _localtimezone().utcoffset(_datetime.now())
        local_offset_hour = offset.seconds / 3600
        local_offset_min = (offset.seconds % 3600) % 60
        if local_offset_hour > 12:
            local_offset_hour -= 24

        if local_offset_hour != h or local_offset_min != m:
            if h < 0:
                #TODO: why is this set to server
                #foff = _fixedoffset(-((abs(h)*60+m)),"server")
                foff = _fixedoffset(-((abs(h) * 60 + m)))
            else:
                #TODO: why is this set to server
                #foff = _fixedoffset((abs(h)*60+m),"server")
                foff = _fixedoffset((abs(h) * 60 + m))

            dt = _datetime(retval[0], retval[1], retval[2], retval[3],
                           retval[4], retval[5], 0, foff)

            # update dict with calculated timezone
            localdt = dt.astimezone(_localtimezone())
            retval[0] = localdt.year
            retval[1] = localdt.month
            retval[2] = localdt.day
            retval[3] = localdt.hour
            retval[4] = localdt.minute
            retval[5] = localdt.second

    if d.get('neg', 0):
        retval[0:5] = map(operator.__neg__, retval[0:5])
    return tuple(retval)
Esempio n. 16
0
def dropbox_upload_file(
    source_path: Union[Path, str],
    *,
    destination_path: Union[PurePosixPath, str],
    app_token: str,
    chunk_size: Optional[int] = None,
) -> PurePosixPath:
    """Upload file to Dropbox.

    Args:
        source_path: Path to source file to upload.
        destination_path: POSIX path to file destination, relative to Dropbox (app) root
            folder.
        share_path: POSIX path to item to share, relative to Dropbox (app) root folder.
        app_token: Registered app token for API access.
        chunk_size: Size of individual upload chunks within the session. If set to None,
            will be the smaller of file size or 128 MB.

    Returns:
        POSIX path to file in Dropbox (app).
    """
    source_path = Path(source_path)
    destination_path = PurePosixPath(destination_path)
    # Dropbox path requires explicit path from app-root.
    if not bool(destination_path.root):
        destination_path = PurePosixPath("/", destination_path)
    file_size = source_path.stat().st_size
    if not chunk_size:
        chunk_size = min(file_size + 1, 134_217_728)
    commit_kwargs = {
        # dropbox v10.10.0: Convert PurePosixPath to str.
        "path": str(destination_path),
        "mode": dropbox.files.WriteMode("overwrite"),
        "client_modified":
        _datetime(*time.gmtime(source_path.stat().st_mtime)[:6]),
        "mute": True,
    }
    api = dropbox.Dropbox(oauth2_access_token=app_token)
    stream = source_path.open(mode="rb")
    if file_size <= chunk_size:
        file_meta = api.files_upload(f=stream.read(), **commit_kwargs)
    else:
        session_meta = api.files_upload_session_start(
            f=stream.read(chunk_size))
        cursor = dropbox.files.UploadSessionCursor(
            session_id=session_meta.session_id, offset=stream.tell())
        while (file_size - stream.tell()) > chunk_size:
            api.files_upload_session_append_v2(f=stream.read(chunk_size),
                                               cursor=cursor)
            cursor.offset = stream.tell()
        file_meta = api.files_upload_session_finish(
            f=stream.read(chunk_size),
            cursor=cursor,
            commit=dropbox.files.CommitInfo(**commit_kwargs),
        )
    return PurePosixPath(file_meta.path_display)
Esempio n. 17
0
def parse_date(s_date):
    # delete last of white space
    while s_date[-1] == ' ':
        s_date = s_date[:-1]

    year, month, day = map(int, s_date.split('/'))
    try:
        return _datetime(year, month, day)
    except ValueError:
        raise IllegalDateException('{} is Illegal date'.format(s_date))
Esempio n. 18
0
def parseDatetimetz(string, local=True):
    y, mo, d, h, m, s, tz = parse(string, local)
    s, micro = divmod(s, 1.0)
    micro = round(micro * 1000000)
    if tz:
        offset = _tzoffset(tz, None) / 60
        _tzinfo = tzinfo(offset)
    else:
        _tzinfo = None
    return _datetime(y, mo, d, int(h), int(m), int(s), int(micro), _tzinfo)
    def download(self, start=None, end=None, output_path=_MP3_OUT_PATH):
        """
        Download mp3 files for the archive entries currently in the `entries`
        list and between the start and end dates.

        Parameters
        ----------
        start : datetime
        end   : datetime
            The earliest date for which to retrieve files
        output_path : str (optional)
            The local path to which archive entry mp3 files will be written. The
            path must exist before calling the method. Defaults to
            '../audio_data/audio_files/mp3_files/'.


        """
        entries = self.entries
        entries_to_pass = []
        dn = _DownloadNavigator(login=True,
                                username=self.username,
                                password=self._password,
                                verbose=self._verbose)

        if not start: start = _datetime(1, 1, 1, 0, 0)
        if not end: end = _datetime(9999, 12, 31, 0, 0)

        if self._verbose:
            print(f'Retrieving list of ArchiveEntries...\n'
                  f' no earlier than {start}\n'
                  f' no later than   {end}')

        # Remove out-of-date-range entries from self.entries
        entries_to_pass = [
            entry for entry in entries
            if entry['end_time'] >= start and entry['end_time'] <= end
        ]

        if self._verbose:
            print(f'\n{len(entries_to_pass)} ArchiveEntries matched.')

        # Pass them as a list to a _DownloadNavigator
        dn.get_archive_mp3s(entries_to_pass, output_path)
Esempio n. 20
0
def parseDatetimetz(string, local=True):
    y, mo, d, h, m, s, tz = parse(string, local)
    s, micro = divmod(s, 1.0)
    micro = round(micro * 1000000)
    if tz:
        offset = _tzoffset(tz, None) / 60
        _tzinfo = tzinfo(offset)
    else:
        _tzinfo = None
    return _datetime(y, mo, d, h, m, int(s), int(micro), _tzinfo)
Esempio n. 21
0
    def __new__(cls, year, month, day, hour=0, minute=0, second=0, microsecond=0, tzinfo=None, is_dst=False):
        """Creates a localized timestamp with the given parameters.
        If tzinfo is omitted, the default time zone will be used."""

        if tzinfo == None:
            tzinfo = _default_tz

        dt = _datetime(year, month, day, hour, minute, second, microsecond)
        dt = tzinfo.localize(dt, is_dst=is_dst)
        return _datetime.__new__(
            cls, dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.microsecond, dt.tzinfo
        )
Esempio n. 22
0
 def _from_json(self,client,json,**kwargs):
     """Make Task from JSON
     
     Arguments:
         client {Client} -- The client being used
         json {dict[str,any]} -- The JSON to use
     
     Returns:
         Task -- The task
     """
     task_json = {**json.copy(), **kwargs}  # Allows to override elements in the JSON
     
     return Task(
         client=client,
         Id=task_json["id"],
         title=task_json["title"],
         setter=_person.from_json(task_json["setter"]),
         student=_person.from_json(task_json["student"]),
         addressees=[_addresse.from_json(addresse) for addresse in task_json["addressees"]],
         set_date=_datetime(
             year=int(task_json["setDate"][:4]),
             month=int(task_json["setDate"][5:7]),
             day=int(task_json["setDate"][8:])
         ),
         due_date=_datetime(
             year=int(task_json["dueDate"][:4]),
             month=int(task_json["dueDate"][5:7]),
             day=int(task_json["dueDate"][8:])
         ),
         mark=_mark.from_json(task_json["mark"]),
         last_marked_as_done_by=_person.from_json(task_json["lastMarkedAsDoneBy"]),
         excused=task_json.get("isExcused",False),
         done=task_json.get("isDone",False),
         read=not task_json.get("isUnread",False),
         archived=task_json.get("archived",False),
         resubmission_required=task_json.get("isResubmissionRequired",False),
         file_submission_required=task_json.get("fileResubmissionRequired",False),
         file_submission_enabled=task_json.get("hasFileSubmission",False),
         description_contains_questions=task_json.get("descriptionContainsQuestions",False)
     )
Esempio n. 23
0
    def __new__(cls, year, month, day, hour=0, minute=0, second=0, microsecond=0, tzinfo=None, is_dst=False):
        """Creates a localized timestamp with the given parameters.
        If tzinfo is omitted, the default time zone will be used."""

        if tzinfo == None:
            tzinfo = _default_tz

        dt = _datetime(year, month, day, hour, minute, second, microsecond)
        dt = tzinfo.localize(dt, is_dst=is_dst)
        return _datetime.__new__(
            cls, dt.year, dt.month, dt.day, 
            dt.hour, dt.minute, dt.second, 
            dt.microsecond, dt.tzinfo)
Esempio n. 24
0
def parseDatetimetz(string, local=True):
    """
    Parse the given string using :func:`parse` and return a :class:`datetime.datetime` instance.
    """
    y, mo, d, h, m, s, tz = parse(string, local)
    s, micro = divmod(s, 1.0)
    micro = round(micro * 1000000)
    if tz:
        offset = _tzoffset(tz, None) / 60
        _tzinfo = tzinfo(offset)
    else:
        _tzinfo = None
    return _datetime(y, mo, d, int(h), int(m), int(s), int(micro), _tzinfo)
Esempio n. 25
0
def _dict_to_tuple(d):
    '''Convert a dictionary to a time tuple.  Depends on key values in the
    regexp pattern!
    '''
    retval = _niltime[:]
    for k,i in ( ('Y', 0), ('M', 1), ('D', 2), ('h', 3), ('m', 4), ):
        v = d.get(k)
        if v: retval[i] = int(v)

    v = d.get('s')
    if v:
        msec,sec = _modf(float(v))
        retval[6],retval[5] = int(round(msec*1000)), int(sec)

    v = d.get('tz')
    if v and v != 'Z':
        h,m = map(int, v.split(':'))
        # check for time zone offset, if within the same timezone,
        # ignore offset specific calculations
        offset=_localtimezone().utcoffset(_datetime.now())
        local_offset_hour = offset.seconds/3600
        local_offset_min = (offset.seconds%3600)%60
        if local_offset_hour > 12:
            local_offset_hour -= 24

        if local_offset_hour != h or local_offset_min != m:
            if h<0:
                #TODO: why is this set to server
                #foff = _fixedoffset(-((abs(h)*60+m)),"server")
                foff = _fixedoffset(-((abs(h)*60+m)))
            else:
                #TODO: why is this set to server
                #foff = _fixedoffset((abs(h)*60+m),"server")
                foff = _fixedoffset((abs(h)*60+m))

            dt = _datetime(retval[0],retval[1],retval[2],retval[3],retval[4],
                           retval[5],0,foff)

            # update dict with calculated timezone
            localdt=dt.astimezone(_localtimezone())
            retval[0] = localdt.year
            retval[1] = localdt.month
            retval[2] = localdt.day
            retval[3] = localdt.hour
            retval[4] = localdt.minute
            retval[5] = localdt.second

    if d.get('neg', 0):
        retval[0:5] = map(operator.__neg__, retval[0:5])
    return tuple(retval)
Esempio n. 26
0
def __add_months(inc, date):
    # if months > 12 then need to adjust the year as well.
    add = date.month - 1 + inc
    year = int(date.year + add / 12 )
    # In order to properly handle negative increments, keep adding 12 until the month is > 1
    # jan = 1 , nov = 11 inc = -2
    # (1 - (-2) ) + 12 = 11
    month = ((date.month % 12) + inc) % 12
    # no month can be 0, so 0 = 12.
    if month == 0:
        month = 12

    day = min(date.day,_calendar.monthrange(year,month)[1])
    retdate = _datetime(year=year, month=month, day=day)

    return retdate
Esempio n. 27
0
def naive_datetime_from_timestruct(timestruct):
    """Convert a time struct to a time-zone-naive date-time object

    The input and the result express the same time in the same time zone,
    but that time zone is not specified.  Consequently, the resulting
    :class:`~datetime.datetime` is time-zone-naive.

    :param timestruct:
        A time struct.
    :type timestruct: :class:`time.struct_time`

    :rtype: :class:`datetime.datetime`

    """
    return _datetime(timestruct.tm_year, timestruct.tm_mon, timestruct.tm_mday,
                     timestruct.tm_hour, timestruct.tm_min, timestruct.tm_sec)
Esempio n. 28
0
def parse_fname(fname):

    fname_noext = _os.path.splitext(fname)[0]
    parts = fname_noext.split("_")

    sitename = parts[0]
    ir_file = parts[1] == "IR"

    if ir_file:
        y, m, d, t = parts[2:]
    else:
        y, m, d, t = parts[1:]

    h, mm, s = t[:2], t[2:4], t[4:]

    return ir_file, sitename, _datetime(*[int(i) for i in [y, m, d, h, mm, s]])
Esempio n. 29
0
def parse_fname(fname):

    fname_noext = _os.path.splitext(fname)[0]
    parts = fname_noext.split("_")

    sitename = parts[0]
    ir_file = parts[1] == "IR"

    if ir_file:
        y, m, d, t = parts[2:]
    else:
        y, m, d, t = parts[1:]

    h, mm, s = t[:2], t[2:4], t[4:]

    return ir_file, sitename, _datetime(*[int(i) for i in [y, m, d, h, mm, s]])
Esempio n. 30
0
def datetime_from_httpstr(string):

    """Convert an HTTP date-time string to a date-time object

    .. seealso:: :rfc:`HTTP/1.1 Full Date <2616#section-3.3.1>`

    :param str string:
        An HTTP date-time string.

    :rtype: :class:`datetime.datetime`

    """

    match = HTTP11_FORMAT_RE.match(string)

    if not match:
        raise ValueError('invalid HTTP date-time string {!r}; expecting a'
                          ' string in one of the formats specified in RFC 2616'
                          ' section 3.3.1'
                          .format(string))

    year_str = match.group('rfc1123_year')
    if year_str:
        year = int(year_str)
    else:
        year_str = match.group('rfc850_year_2digit')
        if year_str:
            # CAVEAT: guess the year meant by a two-digit year, as specified by
            #   :rfc:`HTTP/1.1 Tolerant Applications <2616#section-19.3>`
            year = 2000 + int(year_str)
            if year > _datetime.now().year + 50:
                year -= 100
        else:
            year = int(match.group('ctime_year'))

    month_abbr = _http11_format_re_match_pseudogroup(match, 'month_abbr')
    month = 1 + RFC5322_MONTH_ABBRS.index(month_abbr)

    day = int(_http11_format_re_match_pseudogroup(match, 'day'))
    hour = int(_http11_format_re_match_pseudogroup(match, 'hour'))
    minute = int(_http11_format_re_match_pseudogroup(match, 'minute'))
    second = int(_http11_format_re_match_pseudogroup(match, 'second'))

    return _datetime(year, month, day, hour, minute, second, tzinfo=_tz.UTC)
Esempio n. 31
0
def _fix_timezone(tv, tz_from = "Z", tz_to = None):
    if None in tv[3:5]: # Hour or minute is absent
        return tv

    # Fix local copy of time tuple
    ltv = list(_fix_none_fields(tv))

    if ltv[0] < MINYEAR + 1 or ltv[0] > MAXYEAR - 1:
        return tv # Unable to fix timestamp

    _tz_from = _tz_to_tzinfo(tz_from)
    _tz_to   = _tz_to_tzinfo(tz_to)

    ltv[:6] = _datetime(*(ltv[:6] + [0, _tz_from])).astimezone(_tz_to).timetuple()[:6]

    # Patch local copy with original values
    for i in range(0, 6):
        if tv[i] is None: ltv[i] = None

    return tuple(ltv)
Esempio n. 32
0
        def __new__(cls,
                    year,
                    month,
                    day,
                    hour=0,
                    minute=0,
                    second=0,
                    microsecond=0,
                    tzinfo=None):

            dt = _datetime(year,
                           month,
                           day,
                           hour,
                           minute,
                           second,
                           microsecond,
                           tzinfo=tzinfo)
            if tzinfo is None:
                default_timezone = timezone.get_default_timezone()
                dt = timezone.make_aware(dt, default_timezone)
            return dt
Esempio n. 33
0
    def plot_total_empty_docks(self, start_time, end_time, dt, return_vals=False):
        start_ms = _datetime_to_tstamp(start_time)
        end_ms = _datetime_to_tstamp(end_time)
        dt_ms = dt * 1000
        time_ax = _np.arange(start_ms, end_ms, dt_ms)
        
        timefmt = _mdates.DateFormatter('%H:%M')
        fig = _plt.figure()
        ax = fig.add_subplot(111)
        ax.xaxis_date()
        ax.xaxis.set_major_formatter(timefmt)

        tot_nempty = _np.zeros_like(time_ax)
        for s in self.stations:
            try:
                tot_nempty += self.stations[s].ndocks -\
                  self.stations[s].nbikes_timeseries(time_ax)
            except:
                print("Getting total nbikes failed for station",\
                  "%d with error:\n  %s" %\
                  (self.stations[s].station_id, _sys.exc_info()[1]))

        times = [_tstamp_to_datetime(t) for t in time_ax]
        ax.plot(times, tot_nempty, c="0.3", lw=2)

        curr_line = _datetime(start_time.year, start_time.month, start_time.day)
        while curr_line < end_time:
            ax.axvline(curr_line, color='0.5', ls='--')
            curr_line += _timedelta(days=1)

        ax.set_xlim(start_time, end_time)
        ax.set_xlabel("Time")
        ax.set_ylabel("Number of empty docks across city")

        if return_vals:
            return times, time_ax, tot_nempty
Esempio n. 34
0
    def get_datetimes(self, fmt='datetime'):
        """
        Returns the datetimes of the reports.

        Args:
            fmt : datetime return type:
                  'datetime' - datetime object
                  'string'   - string in YYYY-MM-DD HH:MM format
                  'int'      - integer list in [YYYYMMDD, HHMM] format
                  'unix'     - Unix timestamp
        Returns:
            list, datetimes of the reports
        """
        assert fmt in ('datetime', 'string', 'int',
                       'unix'), "Invalid format \'%s\'" % fmt

        dts = []
        for i in range(self.nrep):

            d = _datetime(self.year[i], self.month[i], self.day[i],
                          self.hour[i], self.minute[i])

            if fmt in ('string', 'int'):
                d = d.isoformat().replace('T', ' ')
                d = d[:[x.start() for x in _re.finditer(':', d)][-1]]
                if fmt == 'int':
                    d = [
                        int(i)
                        for i in d.replace('-', '').replace(':', '').split()
                    ]
            elif fmt == 'unix':
                d = _timegm(d.timetuple())

            dts.append(d)

        return dts
Esempio n. 35
0
def datetime(*args, **kwargs):
    time = _datetime(*args, **kwargs)
    if 'tzinfo' not in kwargs:
        time = timezone.make_aware(time)
    return time
Esempio n. 36
0
# prerequisites
from datetime import datetime as _datetime

import pytest

from tests.base import patch, Mock

_NOW = _datetime(2015, 10, 21, 7, 28)

# example
from datetime import datetime

from hiku.graph import Graph, Root, Field
from hiku.engine import Engine
from hiku.result import denormalize
from hiku.executors.sync import SyncExecutor
from hiku.readers.graphql import read

GRAPH = Graph([
    Root([
        Field('now', None, lambda _: [datetime.now().isoformat()]),
    ]),
])

hiku_engine = Engine(SyncExecutor())


@patch('{}.datetime'.format(__name__))
def test_query(dt):
    dt.now = Mock(return_value=_NOW)
Esempio n. 37
0
 def set_today(cls, dt):
     import datetime
     cls._today = dt
     d = dt
     dd = datetime._datetime(d.year, d.month, d.day)
     datetime.datetime.set_now(dd)
Esempio n. 38
0
def localtime(date):
    """Convert a PyEphem date into naive local time, returning a Python datetime."""
    seconds, microseconds = _convert_to_seconds_and_microseconds(date)
    y, m, d, H, M, S, wday, yday, isdst = _localtime(seconds)
    return _datetime(y, m, d, H, M, S, microseconds)
Esempio n. 39
0
def datetime_from_re_match(string,
                           re,
                           re_flags=0,
                           year_group='year',
                           month_group='month',
                           day_group='day',
                           hour_group='hour',
                           minute_group='minute',
                           second_group='second',
                           microsecond_group='microsecond',
                           tz_sign_group='tz_sign',
                           tz_hours_group='tz_hours',
                           tz_minutes_group='tz_minutes',
                           default_tz=None):

    """
    Parse a date-time from a date-time string according to a regular
    expression

    The given *string* is parsed with the given regular expression *re*,
    extracted to date-time parts using the corresponding named match groups,
    and converted to a :class:`~datetime.datetime`.  These rules apply:

      * The date-time parts are extracted as named :obj:`match groups
        <re.MatchObject.group>`, using the names specified by the
        corresponding *\*_group* arguments.

      * A match is required, but none of the expected parts is required to
        be matched.  If desired, such requirements can be expressed in the
        *re* by making it match only if all desired parts are present.

      * If the year part is omitted, it defaults to :code:`'1970'`.

      * If either of the month or day parts is omitted, it defaults to
        :code:`'1'`.

      * If any time part (hour, minute, second, microsecond) is omitted, it
        defaults to :code:`'0'`.

      * If a time zone is specified:

        * It must contain at least a sign part and an hours part.

        * The sign part must be a positive sign (:code:`'+'` or :code:`''`)
          or a negative sign (:code:`'-'`).

        * The minutes part, if omitted, defaults to :code:`'0'`.

        * The resulting :class:`~datetime.datetime` is time-zone-aware.

        * The resulting time zone is a fixed offset of minutes equal to
          the result of evaluating the arithmetic expression
          :samp:`{tz_sign}({tz_hours} * 60 + {tz_minutes})` after
          substituting the corresponding time zone parts.

      * If a time zone is omitted, it defaults to *default_tz*.  If this is
        null, then the resulting :class:`~datetime.datetime` is
        time-zone-naive.

    :param str string:
        A date-time string.

    :param re:
        A regular expression.
    :type re: ~\ :func:`re.compile`

    :param int re_flags:
        Regular expression flags passed to :func:`re.compile`.

    :param str year_group:
        The name of the *re* match group that captures the year.

    :param str month_group:
        The name of the *re* match group that captures the month.

    :param str day_group:
        The name of the *re* match group that captures the day.

    :param str hour_group:
        The name of the *re* match group that captures the hour.

    :param str minute_group:
        The name of the *re* match group that captures the minute.

    :param str second_group:
        The name of the *re* match group that captures the second.

    :param str microsecond_group:
        The name of the *re* match group that captures the microsecond.

    :param str tz_sign:
        The name of the *re* match group that captures the time zone offset
        sign.

    :param str tz_hours:
        The name of the *re* match group that captures the time zone offset
        hours.

    :param str tz_minutes:
        The name of the *re* match group that captures the time zone offset
        minutes.

    :param default_tz:
        The default time zone.
    :type default_tz: :class:`datetime.tzinfo` or null

    :rtype: :class:`datetime.datetime`

    """

    re = _re.compile(re, re_flags)

    match = re.match(string)
    if match:
        dt_args = []

        for name, default in ((year_group, 1970),
                              (month_group, 1),
                              (day_group, 1),
                              (hour_group, 0),
                              (minute_group, 0),
                              (second_group, 0),
                              (microsecond_group, 0),
                              ):
            value_str = match.group(name)
            if value_str is not None:
                try:
                    value = int(value_str)
                except (TypeError, ValueError):
                    raise ValueError('invalid date-time regex {!r}: group {!r}'
                                      ' matched non-integer string {!r}'
                                      .format(re.pattern, name, value_str))
            else:
                value = default
            dt_args.append(value)

        tz_sign_str = match.group(tz_sign_group)
        if tz_sign_str is not None:
            if tz_sign_str in ('+', ''):
                tz_sign = 1
            elif tz_sign_str == '-':
                tz_sign = -1
            else:
                raise ValueError('invalid date-time regex {!r}: group {!r}'
                                  ' matched non-sign string {!r}'
                                  .format(re.pattern, tz_sign_group,
                                          tz_sign_str))

            tz_hours_str = match.group(tz_hours_group)
            try:
                tz_hours = int(tz_hours_str)
            except (TypeError, ValueError):
                raise ValueError('invalid date-time regex {!r}: group {!r}'
                                  ' matched non-integer string {!r}'
                                  .format(re.pattern, tz_hours_group,
                                          tz_hours_str))

            tz_minutes_str = match.group(tz_minutes_group)
            if tz_minutes_str is not None:
                try:
                    tz_minutes = int(tz_minutes_str)
                except (TypeError, ValueError):
                    raise ValueError('invalid date-time regex {!r}: group {!r}'
                                      ' matched non-integer string {!r}'
                                      .format(re.pattern, tz_minutes_group,
                                              tz_minutes_str))
            else:
                tz_minutes = 0

            tz_minutes += tz_hours * 60
            tz_minutes *= tz_sign
            tz = _tz.FixedOffset(tz_minutes)

        else:
            tz = default_tz
        dt_args.append(tz)

        return _datetime(*dt_args)

    else:
        raise ValueError('invalid date-time string {!r}; expecting a string'
                          ' that matches regex {!r}'
                          .format(string, re.pattern))
Esempio n. 40
0
 def now(cls):
     import datetime
     d = cls._now or datetime._datetime.now()
     return d
     return datetime._datetime(d.year, d.month, d.day, d.hour, d.minute,
                               d.second)
Esempio n. 41
0
def parseDatetimetz(string):
    y, mo, d, h, m, s, tz = parse(string)
    s, micro = divmod(s, 1.0)
    micro = round(micro * 1000000)
    offset = _tzoffset(tz, None) / 60
    return _datetime(y, mo, d, h, m, int(s), int(micro), tzinfo(offset))
Esempio n. 42
0
def main():
    import argparse as _argparse
    import json as _json
    import os as _os
    import re as _re
    import shutil as _shutil
    import hashlib as _hashlib
    import functools as _functools
    from collections import defaultdict as  _defaultdict
    from datetime import datetime as _datetime
    from datetime import timedelta as _timedelta
    from pathlib import Path as Path

    try:
        from google_photos_takeout_helper.__version__ import __version__
    except ModuleNotFoundError:
        from __version__ import __version__

    import piexif as _piexif
    from fractions import Fraction  # piexif requires some values to be stored as rationals
    import math
    if _os.name == 'nt':
        import win32_setctime as _windoza_setctime

    parser = _argparse.ArgumentParser(
        prog='Google Photos Takeout Helper',
        usage='google-photos-takeout-helper -i [INPUT TAKEOUT FOLDER] -o [OUTPUT FOLDER]',
        description=
        """This script takes all of your photos from Google Photos takeout, 
        fixes their exif DateTime data (when they were taken) and file creation date,
        and then copies it all to one folder.
        """,
    )
    parser.add_argument('--version', action='version', version=f"%(prog)s {__version__}")
    parser.add_argument(
        '-i', '--input-folder',
        type=str,
        required=True,
        help='Input folder with all stuff from Google Photos takeout zip(s)'
    )
    parser.add_argument(
        '-o', '--output-folder',
        type=str,
        required=False,
        default='ALL_PHOTOS',
        help='Output folders which in all photos will be placed in'
    )
    parser.add_argument(
        '--skip-extras',
        action='store_true',
        help='EXPERIMENTAL: Skips the extra photos like photos that end in "edited" or "EFFECTS".'
    )
    parser.add_argument(
        '--skip-extras-harder',  # Oh yeah, skip my extras harder daddy
        action='store_true',
        help='EXPERIMENTAL: Skips the extra photos like photos like pic(1). Also includes --skip-extras.'
    )
    parser.add_argument(
        "--divide-to-dates",
        action='store_true',
        help="Create folders and subfolders based on the date the photos were taken"
    )
    parser.add_argument(
        '--albums',
        type=str,
        help="EXPERIMENTAL, MAY NOT WORK FOR EVERYONE: What kind of 'albums solution' you would like:\n"
             "'json' - written in a json file\n"
    )
    args = parser.parse_args()

    logger.info('Heeeere we go!')

    PHOTOS_DIR = Path(args.input_folder)
    FIXED_DIR = Path(args.output_folder)

    TAG_DATE_TIME_ORIGINAL = _piexif.ExifIFD.DateTimeOriginal
    TAG_DATE_TIME_DIGITIZED = _piexif.ExifIFD.DateTimeDigitized
    TAG_DATE_TIME = 306
    TAG_PREVIEW_DATE_TIME = 50971

    photo_formats = ['.jpg', '.jpeg', '.png', '.webp', '.bmp', '.tif', '.tiff', '.svg', '.heic']
    video_formats = ['.mp4', '.gif', '.mov', '.webm', '.avi', '.wmv', '.rm', '.mpg', '.mpe', '.mpeg', '.mkv', '.m4v',
                     '.mts', '.m2ts']
    extra_formats = [
        '-edited', '-effects', '-smile', '-mix',  # EN/US
        '-edytowane',  # PL
        # Add more "edited" flags in more languages if you want. They need to be lowercase.
    ]

    # Album Multimap
    album_mmap = _defaultdict(list)

    # Duplicate by full hash multimap
    files_by_full_hash = _defaultdict(list)

    # holds all the renamed files that clashed from their
    rename_map = dict()

    _all_jsons_dict = _defaultdict(dict)

    # Statistics:
    s_removed_duplicates_count = 0
    s_copied_files = 0
    s_cant_insert_exif_files = []  # List of files where inserting exif failed
    s_date_from_folder_files = []  # List of files where date was set from folder name
    s_skipped_extra_files = []  # List of extra files ("-edited" etc) which were skipped
    s_no_json_found = []  # List of files where we couldn't find json
    s_no_date_at_all = []  # List of files where there was absolutely no option to set correct date

    FIXED_DIR.mkdir(parents=True, exist_ok=True)

    def for_all_files_recursive(
      dir: Path,
      file_function=lambda fi: True,
      folder_function=lambda fo: True,
      filter_fun=lambda file: True
    ):
        for file in dir.rglob("*"):
            if file.is_dir():
                folder_function(file)
                continue
            elif file.is_file():
                if filter_fun(file):
                    file_function(file)
            else:
                logger.debug(f'Found something weird... {file}')

    # This is required, because windoza crashes when timestamp is negative
    # https://github.com/joke2k/faker/issues/460#issuecomment-308897287
    # This (dynamic assigning a function) mayyy be a little faster than comparing it every time (?)
    datetime_from_timestamp = (lambda t: _datetime(1970, 1, 1) + _timedelta(seconds=int(t))) \
        if _os.name == 'nt' \
        else _datetime.fromtimestamp
    timestamp_from_datetime = (lambda dt: (dt - _datetime(1970, 1, 1)).total_seconds()) \
        if _os.name == 'nt' \
        else _datetime.timestamp

    def is_photo(file: Path):
        if file.suffix.lower() not in photo_formats:
            return False
        # skips the extra photo file, like edited or effects. They're kinda useless.
        nonlocal s_skipped_extra_files
        if args.skip_extras or args.skip_extras_harder:  # if the file name includes something under the extra_formats, it skips it.
            for extra in extra_formats:
                if extra in file.name.lower():
                    s_skipped_extra_files.append(str(file.resolve()))
                    return False
        if args.skip_extras_harder:
            search = r"\(\d+\)\."  # we leave the period in so it doesn't catch folders.
            if bool(_re.search(search, file.name)):
                # PICT0003(5).jpg -> PICT0003.jpg      The regex would match "(5).", and replace it with a "."
                plain_file = file.with_name(_re.sub(search, '.', str(file)))
                # if the original exists, it will ignore the (1) file, ensuring there is only one copy of each file.
                if plain_file.is_file():
                    s_skipped_extra_files.append(str(file.resolve()))
                    return False
        return True

    def is_video(file: Path):
        if file.suffix.lower() not in video_formats:
            return False
        return True

    def chunk_reader(fobj, chunk_size=1024):
        """ Generator that reads a file in chunks of bytes """
        while True:
            chunk = fobj.read(chunk_size)
            if not chunk:
                return
            yield chunk

    def get_hash(file: Path, first_chunk_only=False, hash_algo=_hashlib.sha1):
        hashobj = hash_algo()
        with open(file, "rb") as f:
            if first_chunk_only:
                hashobj.update(f.read(1024))
            else:
                for chunk in chunk_reader(f):
                    hashobj.update(chunk)
        return hashobj.digest()

    def populate_album_map(path: Path, filter_fun=lambda f: (is_photo(f) or is_video(f))):
        if not path.is_dir():
            raise NotADirectoryError('populate_album_map only handles directories not files')

        meta_file_exists = find_album_meta_json_file(path)
        if meta_file_exists is None or not meta_file_exists.exists():
            return False

        # means that we are processing an album so process
        for file in path.rglob("*"):
            if not (file.is_file() and filter_fun(file)):
                continue
            file_name = file.name
            # If it's not in the output folder
            if not (FIXED_DIR / file.name).is_file():
                full_hash = None
                try:
                    full_hash = get_hash(file, first_chunk_only=False)
                except Exception as e:
                    logger.debug(e)
                    logger.debug(f"populate_album_map - couldn't get hash of {file}")
                if full_hash is not None and full_hash in files_by_full_hash:
                    full_hash_files = files_by_full_hash[full_hash]
                    if len(full_hash_files) != 1:
                        logger.error("full_hash_files list should only be one after duplication removal, bad state")
                        exit(-5)
                        return False
                    file_name = full_hash_files[0].name

            # check rename map in case there was an overlap namechange
            if str(file) in rename_map:
                file_name = rename_map[str(file)].name

            album_mmap[file.parent.name].append(file_name)

    # PART 3: removing duplicates

    # THIS IS PARTLY COPIED FROM STACKOVERFLOW
    # https://stackoverflow.com/questions/748675/finding-duplicate-files-and-removing-them
    #
    # We now use an optimized version linked from tfeldmann
    # https://gist.github.com/tfeldmann/fc875e6630d11f2256e746f67a09c1ae
    #
    # THANK YOU Todor Minakov (https://github.com/tminakov) and Thomas Feldmann (https://github.com/tfeldmann)
    #
    # NOTE: defaultdict(list) is a multimap, all init array handling is done internally 
    # See: https://en.wikipedia.org/wiki/Multimap#Python
    #
    def find_duplicates(path: Path, filter_fun=lambda file: True):
        files_by_size = _defaultdict(list)
        files_by_small_hash = _defaultdict(list)

        for file in path.rglob("*"):
            if file.is_file() and filter_fun(file):
                try:
                    file_size = file.stat().st_size
                except (OSError, FileNotFoundError):
                    # not accessible (permissions, etc) - pass on
                    continue
                files_by_size[file_size].append(file)

        # For all files with the same file size, get their hash on the first 1024 bytes
        logger.info('Calculating small hashes...')
        for file_size, files in _tqdm(files_by_size.items(), unit='files-by-size'):
            if len(files) < 2:
                continue  # this file size is unique, no need to spend cpu cycles on it

            for file in files:
                try:
                    small_hash = get_hash(file, first_chunk_only=True)
                except OSError:
                    # the file access might've changed till the exec point got here
                    continue
                files_by_small_hash[(file_size, small_hash)].append(file)

        # For all files with the hash on the first 1024 bytes, get their hash on the full
        # file - if more than one file is inserted on a hash here they are certinly duplicates
        logger.info('Calculating full hashes...')
        for files in _tqdm(files_by_small_hash.values(), unit='files-by-small-hash'):
            if len(files) < 2:
                # the hash of the first 1k bytes is unique -> skip this file
                continue

            for file in files:
                try:
                    full_hash = get_hash(file, first_chunk_only=False)
                except OSError:
                    # the file access might've changed till the exec point got here
                    continue

                files_by_full_hash[full_hash].append(file)

    # Removes all duplicates in folder
    # ONLY RUN AFTER RUNNING find_duplicates()
    def remove_duplicates():
        nonlocal s_removed_duplicates_count
        # Now we have populated the final multimap of absolute dups, We now can attempt to find the original file
        # and remove all the other duplicates
        for files in _tqdm(files_by_full_hash.values(), unit='duplicates'):
            if len(files) < 2:
                continue  # this file size is unique, no need to spend cpu cycles on it

            s_removed_duplicates_count += len(files) - 1
            for file in files:
                # TODO reconsider which dup we delete these now that we're searching globally?
                if len(files) > 1:
                    file.unlink()
                    files.remove(file)
        return True

    # PART 1: Fixing metadata and date-related stuff

    # Returns json dict
    def find_json_for_file(file: Path):
        parenthesis_regexp = r'\([0-9]+\)'
        parenthesis = _re.findall(parenthesis_regexp, file.name)
        if len(parenthesis) == 1:
            # Fix for files that have as image/video IMG_1234(1).JPG with a json IMG_1234.JPG(1).json
            stripped_filename = _re.sub(parenthesis_regexp, '', file.name)
            potential_json = file.with_name(stripped_filename + parenthesis[0] + '.json')
        else:
            potential_json = file.with_name(file.name + '.json')

        if potential_json.is_file():
            try:
                with open(potential_json, 'r') as f:
                    json_dict = _json.load(f)
                return json_dict
            except:
                raise FileNotFoundError(f"Couldn't find json for file: {file}")

        nonlocal _all_jsons_dict
        # Check if we need to load this folder
        if file.parent not in _all_jsons_dict:
            for json_file in file.parent.rglob("*.json"):
                try:
                    with json_file.open('r') as f:
                        json_dict = _json.load(f)
                        if "title" in json_dict:
                            # We found a JSON file with a proper title, store the file name
                            _all_jsons_dict[file.parent][json_dict["title"]] = json_dict
                except:
                    logger.debug(f"Couldn't open json file {json_file}")

        # Check if we have found the JSON file among all the loaded ones in the folder
        if file.parent in _all_jsons_dict and file.name in _all_jsons_dict[file.parent]:
            # Great we found a valid JSON file in this folder corresponding to this file
            return _all_jsons_dict[file.parent][file.name]
        else:
            nonlocal s_no_json_found
            s_no_json_found.append(str(file.resolve()))
            raise FileNotFoundError(f"Couldn't find json for file: {file}")

    # Returns date in 2019:01:01 23:59:59 format
    def get_date_from_folder_meta(dir: Path):
        file = find_album_meta_json_file(dir)
        if not file:
            logger.debug("Couldn't pull datetime from album meta")
            return None
        try:
            with open(str(file), 'r') as fi:
                album_dict = _json.load(fi)
                # find_album_meta_json_file *should* give us "safe" file
                time = int(album_dict["albumData"]["date"]["timestamp"])
                return datetime_from_timestamp(time).strftime('%Y:%m:%d %H:%M:%S')
        except KeyError:
            logger.error(
                "get_date_from_folder_meta - json doesn't have required stuff "
                "- that probably means that either google f****d us again, or find_album_meta_json_file"
                "is seriously broken"
            )

        return None

    @_functools.lru_cache(maxsize=None)
    def find_album_meta_json_file(dir: Path):
        for file in dir.rglob("*.json"):
            try:
                with open(str(file), 'r') as f:
                    dict = _json.load(f)
                    if "albumData" in dict:
                        return file
            except Exception as e:
                logger.debug(e)
                logger.debug(f"find_album_meta_json_file - Error opening file: {file}")

        return None

    def set_creation_date_from_str(file: Path, str_datetime):
        try:
            # Turns out exif can have different formats - YYYY:MM:DD, YYYY/..., YYYY-... etc
            # God wish that americans won't have something like MM-DD-YYYY
            # The replace ': ' to ':0' fixes issues when it reads the string as 2006:11:09 10:54: 1.
            # It replaces the extra whitespace with a 0 for proper parsing
            str_datetime = str_datetime.replace('-', ':').replace('/', ':').replace('.', ':') \
                               .replace('\\', ':').replace(': ', ':0')[:19]
            timestamp = timestamp_from_datetime(
                _datetime.strptime(
                    str_datetime,
                    '%Y:%m:%d %H:%M:%S'
                )
            )
            _os.utime(file, (timestamp, timestamp))
            if _os.name == 'nt':
                _windoza_setctime.setctime(str(file), timestamp)
        except Exception as e:
            raise ValueError(f"Error setting creation date from string: {str_datetime}")

    def set_creation_date_from_exif(file: Path):
        try:
            # Why do you need to be like that, Piexif...
            exif_dict = _piexif.load(str(file))
        except Exception as e:
            raise IOError("Can't read file's exif!")
        tags = [['0th', TAG_DATE_TIME], ['Exif', TAG_DATE_TIME_ORIGINAL], ['Exif', TAG_DATE_TIME_DIGITIZED]]
        datetime_str = ''
        date_set_success = False
        for tag in tags:
            try:
                datetime_str = exif_dict[tag[0]][tag[1]].decode('UTF-8')
                set_creation_date_from_str(file, datetime_str)
                date_set_success = True
                break
            except KeyError:
                pass  # No such tag - continue searching :/
            except ValueError:
                logger.debug("Wrong date format in exif!")
                logger.debug(datetime_str)
                logger.debug("does not match '%Y:%m:%d %H:%M:%S'")
        if not date_set_success:
            raise IOError('No correct DateTime in given exif')

    def set_file_exif_date(file: Path, creation_date):
        try:
            exif_dict = _piexif.load(str(file))
        except:  # Sorry but Piexif is too unpredictable
            exif_dict = {'0th': {}, 'Exif': {}}

        creation_date = creation_date.encode('UTF-8')
        exif_dict['0th'][TAG_DATE_TIME] = creation_date
        exif_dict['Exif'][TAG_DATE_TIME_ORIGINAL] = creation_date
        exif_dict['Exif'][TAG_DATE_TIME_DIGITIZED] = creation_date

        try:
            _piexif.insert(_piexif.dump(exif_dict), str(file))
        except Exception as e:
            logger.debug("Couldn't insert exif!")
            logger.debug(e)
            nonlocal s_cant_insert_exif_files
            s_cant_insert_exif_files.append(str(file.resolve()))

    def get_date_str_from_json(json):
        return datetime_from_timestamp(
            int(json['photoTakenTime']['timestamp'])
        ).strftime('%Y:%m:%d %H:%M:%S')

    # ========= THIS IS ALL GPS STUFF =========

    def change_to_rational(number):
        """convert a number to rantional
        Keyword arguments: number
        return: tuple like (1, 2), (numerator, denominator)
        """
        f = Fraction(str(number))
        return f.numerator, f.denominator

    # got this here https://github.com/hMatoba/piexifjs/issues/1#issuecomment-260176317
    def degToDmsRational(degFloat):
        min_float = degFloat % 1 * 60
        sec_float = min_float % 1 * 60
        deg = math.floor(degFloat)
        deg_min = math.floor(min_float)
        sec = round(sec_float * 100)

        return [(deg, 1), (deg_min, 1), (sec, 100)]

    def set_file_geo_data(file: Path, json):
        """
        Reads the geoData from google and saves it to the EXIF. This works assuming that the geodata looks like -100.12093, 50.213143. Something like that.

        Written by DalenW.
        :param file:
        :param json:
        :return:
        """

        # prevents crashes
        try:
            exif_dict = _piexif.load(str(file))
        except:
            exif_dict = {'0th': {}, 'Exif': {}}

        # converts a string input into a float. If it fails, it returns 0.0
        def _str_to_float(num):
            if type(num) == str:
                return 0.0
            else:
                return float(num)

        # fallbacks to GeoData Exif if it wasn't set in the photos editor.
        # https://github.com/TheLastGimbus/GooglePhotosTakeoutHelper/pull/5#discussion_r531792314
        longitude = _str_to_float(json['geoData']['longitude'])
        latitude = _str_to_float(json['geoData']['latitude'])
        altitude = _str_to_float(json['geoData']['altitude'])

        # Prioritise geoData set from GPhotos editor. If it's blank, fall back to geoDataExif
        if longitude == 0 and latitude == 0:
            longitude = _str_to_float(json['geoDataExif']['longitude'])
            latitude = _str_to_float(json['geoDataExif']['latitude'])
            altitude = _str_to_float(json['geoDataExif']['altitude'])

        # latitude >= 0: North latitude -> "N"
        # latitude < 0: South latitude -> "S"
        # longitude >= 0: East longitude -> "E"
        # longitude < 0: West longitude -> "W"

        if longitude >= 0:
            longitude_ref = 'E'
        else:
            longitude_ref = 'W'
            longitude = longitude * -1

        if latitude >= 0:
            latitude_ref = 'N'
        else:
            latitude_ref = 'S'
            latitude = latitude * -1

        # referenced from https://gist.github.com/c060604/8a51f8999be12fc2be498e9ca56adc72
        gps_ifd = {
            _piexif.GPSIFD.GPSVersionID: (2, 0, 0, 0)
        }

        # skips it if it's empty
        if latitude != 0 or longitude != 0:
            gps_ifd.update({
                _piexif.GPSIFD.GPSLatitudeRef: latitude_ref,
                _piexif.GPSIFD.GPSLatitude: degToDmsRational(latitude),

                _piexif.GPSIFD.GPSLongitudeRef: longitude_ref,
                _piexif.GPSIFD.GPSLongitude: degToDmsRational(longitude)
            })

        if altitude != 0:
            gps_ifd.update({
                _piexif.GPSIFD.GPSAltitudeRef: 1,
                _piexif.GPSIFD.GPSAltitude: change_to_rational(round(altitude))
            })

        gps_exif = {"GPS": gps_ifd}
        exif_dict.update(gps_exif)

        try:
            _piexif.insert(_piexif.dump(exif_dict), str(file))
        except Exception as e:
            logger.debug("Couldn't insert geo exif!")
            # local variable 'new_value' referenced before assignment means that one of the GPS values is incorrect
            logger.debug(e)

    # ============ END OF GPS STUFF ============

    # Fixes ALL metadata, takes just file and dir and figures it out
    def fix_metadata(file: Path):
        # logger.info(file)

        has_nice_date = False
        try:
            set_creation_date_from_exif(file)
            has_nice_date = True
        except (_piexif.InvalidImageDataError, ValueError, IOError) as e:
            logger.debug(e)
            logger.debug(f'No exif for {file}')
        except IOError:
            logger.debug('No creation date found in exif!')

        try:
            google_json = find_json_for_file(file)
            date = get_date_str_from_json(google_json)
            set_file_geo_data(file, google_json)
            set_file_exif_date(file, date)
            set_creation_date_from_str(file, date)
            has_nice_date = True
            return
        except FileNotFoundError as e:
            logger.debug(e)

        if has_nice_date:
            return True

        logger.debug(f'Last option, copying folder meta as date for {file}')
        date = get_date_from_folder_meta(file.parent)
        if date is not None:
            set_file_exif_date(file, date)
            set_creation_date_from_str(file, date)
            nonlocal s_date_from_folder_files
            s_date_from_folder_files.append(str(file.resolve()))
            return True
        else:
            logger.warning(f'There was literally no option to set date on {file}')
            nonlocal s_no_date_at_all
            s_no_date_at_all.append(str(file.resolve()))

        return False

    # PART 2: Copy all photos and videos to target folder

    # Makes a new name like 'photo(1).jpg'
    def new_name_if_exists(file: Path):
        new_name = file
        i = 1
        while True:
            if not new_name.is_file():
                return new_name
            else:
                new_name = file.with_name(f"{file.stem}({i}){file.suffix}")
                rename_map[str(file)] = new_name
                i += 1

    def copy_to_target(file: Path):
        if is_photo(file) or is_video(file):
            new_file = new_name_if_exists(FIXED_DIR / file.name)
            _shutil.copy2(file, new_file)
            nonlocal s_copied_files
            s_copied_files += 1
        return True

    def copy_to_target_and_divide(file: Path):
        creation_date = file.stat().st_mtime
        date = datetime_from_timestamp(creation_date)

        new_path = FIXED_DIR / f"{date.year}/{date.month:02}/"
        new_path.mkdir(parents=True, exist_ok=True)

        new_file = new_name_if_exists(new_path / file.name)
        _shutil.copy2(file, new_file)
        nonlocal s_copied_files
        s_copied_files += 1
        return True

    # xD python lambdas are shit - this is only because we can't do 2 commands, so we do them in arguments
    def _walk_with_tqdm(res, bar: _tqdm):
        bar.update()
        return res

    # Count *all* photo and video files - this is hacky, and we should use .rglob altogether instead of is_photo
    logger.info("Counting how many input files we have ahead...")
    _input_files_count = 0
    for ext in _tqdm(photo_formats + video_formats, unit='formats'):
        _input_files_count += len(list(PHOTOS_DIR.rglob(f'**/*{ext}')))
    logger.info(f'Input files: {_input_files_count}')

    logger.info('=====================')
    logger.info('Fixing files metadata and creation dates...')
    # tqdm progress bar stuff
    _metadata_bar = _tqdm(total=_input_files_count, unit='files')

    for_all_files_recursive(
        dir=PHOTOS_DIR,
        file_function=lambda f: _walk_with_tqdm(fix_metadata(f), _metadata_bar),
        # TODO (probably never, but should): Change this maybe to path.rglob
        filter_fun=lambda f: (is_photo(f) or is_video(f))
    )
    _metadata_bar.close()
    logger.info('=====================')

    logger.info('=====================')
    _copy_bar = _tqdm(total=_input_files_count, unit='files')
    if args.divide_to_dates:
        logger.info('Creating subfolders and dividing files based on date...')
        for_all_files_recursive(
            dir=PHOTOS_DIR,
            file_function=lambda f: _walk_with_tqdm(copy_to_target_and_divide(f), _copy_bar),
            filter_fun=lambda f: (is_photo(f) or is_video(f))
        )
    else:
        logger.info('Copying all files to one folder...')
        logger.info('(If you want, you can get them organized in folders based on year and month.'
                    ' Run with --divide-to-dates to do this)')
        for_all_files_recursive(
            dir=PHOTOS_DIR,
            file_function=lambda f: _walk_with_tqdm(copy_to_target(f), _copy_bar),
            filter_fun=lambda f: (is_photo(f) or is_video(f))
        )
    _copy_bar.close()
    logger.info('=====================')
    logger.info('=====================')
    logger.info('Finding duplicates...')
    find_duplicates(FIXED_DIR, lambda f: (is_photo(f) or is_video(f)))
    logger.info('Removing duplicates...')
    remove_duplicates()
    logger.info('=====================')
    if args.albums is not None:
        if args.albums.lower() == 'json':
            logger.info('=====================')
            logger.info('Populate json file with albums...')
            logger.info('=====================')
            for_all_files_recursive(
                dir=PHOTOS_DIR,
                folder_function=populate_album_map
            )
            file = PHOTOS_DIR / 'albums.json'
            with open(file, 'w', encoding="utf-8") as outfile:
                _json.dump(album_mmap, outfile)
            logger.info(str(file))

    logger.info('')
    logger.info('DONE! FREEEEEDOOOOM!!!')
    logger.info('')
    logger.info("Final statistics:")
    logger.info(f"Files copied to target folder: {s_copied_files}")
    logger.info(f"Removed duplicates: {s_removed_duplicates_count}")
    logger.info(f"Files for which we couldn't find json: {len(s_no_json_found)}")
    if len(s_no_json_found) > 0:
        with open(PHOTOS_DIR / 'no_json_found.txt', 'w', encoding="utf-8") as f:
            f.write("# This file contains list of files for which there was no corresponding .json file found\n")
            f.write("# You might find it useful, but you can safely delete this :)\n")
            f.write("\n".join(s_no_json_found))
            logger.info(f" - you have full list in {f.name}")
    logger.info(f"Files where inserting new exif failed: {len(s_cant_insert_exif_files)}")
    if len(s_cant_insert_exif_files) > 0:
        logger.info("(This is not necessary bad thing - pretty much all videos fail, "
                    "and your photos probably have their original exif already")
        with open(PHOTOS_DIR / 'failed_inserting_exif.txt', 'w', encoding="utf-8") as f:
            f.write("# This file contains list of files where setting right exif date failed\n")
            f.write("# You might find it useful, but you can safely delete this :)\n")
            f.write("\n".join(s_cant_insert_exif_files))
            logger.info(f" - you have full list in {f.name}")
    logger.info(f"Files where date was set from name of the folder: {len(s_date_from_folder_files)}")
    if len(s_date_from_folder_files) > 0:
        with open(PHOTOS_DIR / 'date_from_folder_name.txt', 'w', encoding="utf-8") as f:
            f.write("# This file contains list of files where date was set from name of the folder\n")
            f.write("# You might find it useful, but you can safely delete this :)\n")
            f.write("\n".join(s_date_from_folder_files))
            logger.info(f" - you have full list in {f.name}")
    if args.skip_extras or args.skip_extras_harder:
        # Remove duplicates: https://www.w3schools.com/python/python_howto_remove_duplicates.asp
        s_skipped_extra_files = list(dict.fromkeys(s_skipped_extra_files))
        logger.info(f"Extra files that were skipped: {len(s_skipped_extra_files)}")
        with open(PHOTOS_DIR / 'skipped_extra_files.txt', 'w', encoding="utf-8") as f:
            f.write("# This file contains list of extra files (ending with '-edited' etc) which were skipped because "
                    "you've used either --skip-extras or --skip-extras-harder\n")
            f.write("# You might find it useful, but you can safely delete this :)\n")
            f.write("\n".join(s_skipped_extra_files))
            logger.info(f" - you have full list in {f.name}")
    if len(s_no_date_at_all) > 0:
        logger.info('')
        logger.info(f"!!! There were {len(s_no_date_at_all)} files where there was absolutely no way to set "
                    f"a correct date! They will probably appear at the top of the others, as their 'last modified' "
                    f"value is set to moment of downloading your takeout :/")
        with open(PHOTOS_DIR / 'unsorted.txt', 'w', encoding="utf-8") as f:
            f.write("# This file contains list of files where there was no way to set correct date!\n")
            f.write("# You probably want to set their dates manually - but you can delete this if you want\n")
            f.write("\n".join(s_no_date_at_all))
            logger.info(f" - you have full list in {f.name}")

    logger.info('')
    logger.info('Sooo... what now? You can see README.md for what nice G Photos alternatives I found and recommend')
    logger.info('')
    logger.info('If I helped you, you can consider donating me: https://www.paypal.me/TheLastGimbus')
    logger.info('Have a nice day!')
Esempio n. 43
0
def _db_parse_file_column(bytes_):
    ''' parse the plist in the file column of the Files table of iOS10+ backups '''

    # NOTE: using the plistlib parser failed, not sure why
    p = _biplist.readPlistFromString(bytes_)

    # check if we have the same fields as in our reference sample
    if set(p.keys()) != _known_plist_keys:
        raise PlistParseError(
            'someting in file column of Files table in Manifest.db changed')

    # we have only seen backups where $version, $archiver and $top have fixed
    # values, so raise exception when we hit some other value
    version = p.get('$version')
    if version != 100000:
        raise PlistParseError('$version != 100000')
    archiver = p.get('$archiver')
    if archiver != 'NSKeyedArchiver':
        raise PlistParseError('$archiver != NSKeyedArchiver')
    root_uid = p.get('$top').get('root')
    if root_uid.integer != 1:
        raise PlistParseError("$top['root'] != Uid(1)")

    # the interesting data is in the $objects field
    objects = p.get('$objects')
    # first field is expected to be $null
    if objects[0] != '$null':
        raise PlistParseError("$objects[0] != $null")

    # check if we have any new types of of fields
    #if len(set(objects[1].keys()) - _known_object_keys) != 0:
    #    raise PlistParseError("$objects[1] fields do not match known fields: {:s}".format(str(objects[1].keys())))

    uid = objects[1].get('UserID')
    # contents modification time
    mtime = _datetime(*list(_gmtime(objects[1].get('LastModified'))[0:7]) +
                      [_UTC])
    inode = objects[1].get('InodeNumber')
    mode = objects[1].get('Mode')
    # determine filetype and permissions based on mode
    filetype = FileType(mode & 0xE000)
    permissions = oct(mode & 0x1FFF)
    # metadata-change time
    ctime = _datetime(*list(_gmtime(objects[1].get('LastStatusChange'))[0:7]) +
                      [_UTC])
    gid = objects[1].get('GroupID')
    # birth-time (aka creation time)
    btime = _datetime(*list(_gmtime(objects[1].get('Birth'))[0:7]) + [_UTC])
    size = objects[1].get('Size')
    # not sure what this is
    protection = objects[1].get('ProtectionClass')

    # apparently since iOS11 the plist includes a field 'Flags' in the plist
    # field as well, but I've only seen value 0 in my backups
    if objects[1].get('Flags', 0) != 0:
        raise PearBackError('assumption on plist flags field broken')

    # the Uid stored in 'RelativePath' seems to point to index in 'objects'
    # where the actual value is stored. The Uid.integer property gives the
    # integer value
    relpath = objects[objects[1].get('RelativePath').integer]

    # something similar for the '$class', which seems to be 'MBFile' always
    class_ = objects[objects[1].get('$class').integer].get('$classname')
    if class_ != 'MBFile':
        raise PlistParseError('assumption broken: $class is not always MBFile')

    # extended attributes are not always present, but if they are, they seem to
    # be pointed to by the UID value in the 'ExtendedAttributes' fields. The
    # target item then contains a bplist with key-value pairs under the key
    # 'NS.data'
    if 'ExtendedAttributes' in objects[1]:
        ea_idx = objects[1]['ExtendedAttributes'].integer
        extended_attributes = _biplist.readPlistFromString(
            objects[ea_idx].get('NS.data'))
    else:
        extended_attributes = None

    # target is only available when type is a symlink and its value indicates
    # the index of the property in the $objects field
    if filetype == FileType.Symlink:
        if 'Target' not in objects[1]:
            raise PlistParseError('Assumption broken on symlinks')
        tgt_idx = objects[1]['Target'].integer
        linktarget = objects[tgt_idx]
    else:
        linktarget = None

    # digest is also not always present, it works similar as above two fields,
    # let's store hex string instead of bytes
    if 'Digest' in objects[1]:
        d_idx = objects[1]['Digest'].integer
        digest = objects[d_idx]
        if type(digest) == dict:
            digest = digest['NS.data']
            digest = _hexlify(digest).decode()
        else:
            digest = _hexlify(digest).decode()
    else:
        digest = None

    # convert to our named tuple and return object
    return _plistvals(uid, gid, mtime, ctime, btime, inode, mode, filetype,
                      permissions, size, protection, relpath,
                      extended_attributes, linktarget, digest)
Esempio n. 44
0
from datetime import date as _date
from datetime import datetime as _datetime


# ---------- Miscellaneous ----------

WIKIA_BASE_ADDRESS = 'https://pixelstarships.fandom.com/wiki/'


# ---------- Defaults ----------

DEFAULT_FLOAT_PRECISION: int = 1


# ---------- Formatting / Parsing ----------

API_DATETIME_FORMAT_ISO: str = '%Y-%m-%dT%H:%M:%S'
API_DATETIME_FORMAT_ISO_DETAILED: str = '%Y-%m-%dT%H:%M:%S.%f'
API_DATETIME_FORMAT_CUSTOM: str = '%d.%m.%y %H:%M'


# ---------- PSS ----------

PSS_START_DATE: _date = _date(year=2016, month=1, day=6)
PSS_START_DATETIME: _datetime = _datetime(year=2016, month=1, day=6)
Esempio n. 45
0
def _parse_mbdb_entry(mbdb, pos):
    ''' parse a single entry in the mbdb file '''

    offset = pos
    domain, pos = _mbdb_string(mbdb, pos)
    filename, pos = _mbdb_string(mbdb, pos)
    linktarget, pos = _mbdb_string(mbdb, pos)
    # a simple test (N=1, scientific is it not?) shows that what is commonly
    # called 'datahash' in the scripts I used as basis for this, is stored in a
    # value that is called 'digest' in the newer (iOS10+) backups. So, we call
    # this 'digest' here as well.
    digest, pos = _mbdb_string(mbdb, pos)
    # this is commonly called enckey in the scripts that I used as source, but
    # in my backups it is consistently an empty string. So assume that it is,
    # break if it isn't.
    unknown, pos = _mbdb_string(mbdb, pos)
    if unknown != '':
        raise MbdbParseError(
            'assumption broken on empty string in unknown field')

    mode = int.from_bytes(mbdb[pos:pos + 2], 'big')
    pos += 2
    inode = int.from_bytes(mbdb[pos:pos + 8], 'big')
    pos += 8
    uid = int.from_bytes(mbdb[pos:pos + 4], 'big')
    pos += 4
    gid = int.from_bytes(mbdb[pos:pos + 4], 'big')
    pos += 4
    # some sources that I based this function on had a different
    # order for these timestamps and in addition instead of a
    # btime assumed an atime, which I think is incorrect based on some simple
    # experiments (comparing timestamps on a rooted phone with backup
    # timestamps).
    mtime = _datetime(
        *list(_gmtime(int.from_bytes(mbdb[pos:pos + 4], 'big'))[0:7]) + [_UTC])
    pos += 4
    ctime = _datetime(
        *list(_gmtime(int.from_bytes(mbdb[pos:pos + 4], 'big'))[0:7]) + [_UTC])
    pos += 4
    btime = _datetime(
        *list(_gmtime(int.from_bytes(mbdb[pos:pos + 4], 'big'))[0:7]) + [_UTC])
    pos += 4
    size = int.from_bytes(mbdb[pos:pos + 8], 'big')
    pos += 8
    # Based on the different values I've encountered in the field that is
    # commonly called 'flags' in the scripts that I've used as source it would
    # seem that this is what is called 'protection' in the newer backups.
    # Perhaps these values represent some enum value of the protection level.
    # So, I've called this field 'protection' in contrast to the other scripts
    # out there.
    protection = int.from_bytes(mbdb[pos:pos + 1], 'big')
    pos += 1
    numprops = int.from_bytes(mbdb[pos:pos + 1], 'big')
    pos += 1

    # determine filetype and permissions based on mode
    filetype = FileType(mode & 0xE000)
    permissions = oct(mode & 0x1FFF)

    extended_attributes = _OD()
    for ii in range(numprops):
        pname, pos = _mbdb_string(mbdb, pos)
        pval, pos = _mbdb_string(mbdb, pos)
        extended_attributes[pname] = pval

    # the fileID was originally stored in a separate mbdx file, but we can also
    # determine this by combining the domain and filepath and calculating sha1
    # hash over it
    fileID = _sha1('{:s}-{:s}'.format(domain,
                                      filename).encode('utf8')).hexdigest()

    return _file_entry(fileID, domain, filename, uid, gid, mtime, ctime, btime,
                       inode, mode, filetype, permissions, size, protection,
                       extended_attributes, linktarget, digest), pos
Esempio n. 46
0
class TestMergeDocuments:
    def test_it_returns_the_first_doc(self, db_session, duplicate_docs):
        merged = merge_documents(db_session, duplicate_docs)

        assert merged == duplicate_docs[0]

    def test_it_deletes_all_but_the_first(self, db_session, duplicate_docs):
        merge_documents(db_session, duplicate_docs)
        db_session.flush()

        assert not (db_session.query(Document).filter(
            Document.id.in_([duplicate_docs[1].id, duplicate_docs[2].id
                             ])).count())

    @pytest.mark.parametrize("updated", (None, _datetime(2001, 1, 1)))
    @pytest.mark.parametrize("sub_item", ("document_uris", "meta"))
    def test_it_moves_sub_items_to_the_first(self, db_session, duplicate_docs,
                                             datetime, updated, sub_item):
        items = []
        for doc in duplicate_docs[1:]:
            items.extend(getattr(doc, sub_item))

        master = merge_documents(db_session, duplicate_docs, updated=updated)
        db_session.flush()

        assert [len(getattr(doc, sub_item))
                for doc in duplicate_docs] == [3, 0, 0]

        expected_date = updated if updated else datetime.utcnow.return_value
        for item in items:
            assert item.document == master
            assert item.updated == expected_date

    def test_it_moves_annotations_to_the_first(self, db_session,
                                               duplicate_docs):
        merge_documents(db_session, duplicate_docs)
        db_session.flush()

        for document, expected_count in zip(duplicate_docs, (3, 0, 0)):
            count = (db_session.query(
                models.Annotation).filter_by(document_id=document.id).count())

            assert count == expected_count

    def test_it_raises_retryable_error_when_flush_fails(
            self, db_session, duplicate_docs, monkeypatch):
        def err():
            raise sa.exc.IntegrityError(None, None, None)

        monkeypatch.setattr(db_session, "flush", err)

        with pytest.raises(ConcurrentUpdateError):
            merge_documents(db_session, duplicate_docs)

    def test_it_logs_when_its_called(self, caplog, db_session, duplicate_docs):
        caplog.set_level(logging.INFO)

        merge_documents(db_session, duplicate_docs)

        assert caplog.record_tuples == [("h.models.document._document", 20,
                                         "Merging 3 documents")]

    @pytest.fixture
    def duplicate_docs(self, db_session, factories):
        uri = "http://example.com/master"

        documents = []
        for _ in range(3):
            meta = factories.DocumentMeta()

            documents.append(
                factories.Document(
                    document_uris=[
                        factories.DocumentURI(claimant=meta.claimant, uri=uri)
                    ],
                    meta=[meta],
                ))

        db_session.flush()

        for doc in documents:
            db_session.add(
                models.Annotation(userid="userid", document_id=doc.id))

        return documents
Esempio n. 47
0
 def _sum_args(self):
     return {'start': _datetime(2000, 1, 1, tzinfo=_tz.FixedOffset(30)),
             'offset': _timedelta(days=399, hours=4, minutes=5, seconds=6,
                                  microseconds=7)}
Esempio n. 48
0
 def _difference_args(self):
     return {'start': _datetime(2000, 1, 1, tzinfo=_tz.UTC),
             'end': _datetime(2001, 2, 3, 4, 5, 6, 7,
                              tzinfo=_tz.FixedOffset(8 * 60 + 9))}
Esempio n. 49
0
 def _echo_args(self):
     return {'dt': _datetime(2001, 2, 3, 4, 5, 6, 7,
                             tzinfo=_tz.FixedOffset(8 * 60 + 9))}
Esempio n. 50
0
# prerequisites
from datetime import datetime as _datetime

from tests.base import patch, Mock

_NOW = _datetime(2015, 10, 21, 7, 28)

# example
from datetime import datetime

from hiku.graph import Graph, Root, Field
from hiku.engine import Engine
from hiku.result import denormalize
from hiku.executors.sync import SyncExecutor
from hiku.readers.graphql import read

GRAPH = Graph([
    Root([
        Field('now', None, lambda _: [datetime.now().isoformat()]),
    ]),
])

hiku_engine = Engine(SyncExecutor())

@patch('{}.datetime'.format(__name__))
def test(dt):
    dt.now = Mock(return_value=_NOW)

    query = read('{ now }')
    result = hiku_engine.execute(GRAPH, query)
    simple_result = denormalize(GRAPH, result, query)
Esempio n. 51
0
# use timestamp construct date
_date.fromtimestamp(_time.time())  # 2017-03-23 <class 'datetime.date'>
#
# date of today
_date.today()  # 2017-03-23 <class 'datetime.date'>
#
#
#
#
#
#
#

from datetime import datetime as _datetime

_datetime(2017, 3, 23, 18, 23, 19) # 2017-03-23 18:23:19

now = _datetime.now() # 2017-03-24 17:28:10.846293
now.strftime('%Y-%m-%d %H:%M:%S') # 2017-03-24 17:28:10
now.time() # 17:28:10.846293
now.date() # 2017-03-24
_datetime.today() # 2017-03-24 17:28:10.846366
#
#
#
#
#
#
from datetime import timedelta as _timedelta

d01 = _datetime(2017, 1, 1)
Esempio n. 52
0
File: compat.py Progetto: vhata/ibid
 def dt_strptime(date_string, format):
     return _datetime(*(_time.strptime(date_string, format)[:6]))
Esempio n. 53
0
def localtime(date):
    """Convert a PyEphem date into naive local time, returning a Python datetime."""
    seconds, microseconds = _convert_to_seconds_and_microseconds(date)
    y, m, d, H, M, S, wday, yday, isdst = _localtime(seconds)
    return _datetime(y, m, d, H, M, S, microseconds)