Example #1
0
    def validate_result(self, result):
        # Ensure incoming result is dict and check allowed fields
        self.check_if_dict(result, "Result")
        self.check_allowed_fields(result_allowed_fields, result, "Result")

        # If duration included, ensure valid duration can be parsed from it
        if "duration" in result:
            try:
                parse_duration(result["duration"])
            except ISO8601Error as e:
                self.return_error("Error with result duration - %s" % e.message)

                # If success or completion included, ensure they are boolean
        if "success" in result:
            if not isinstance(result["success"], bool):
                self.return_error("Result success must be a boolean value")
        if "completion" in result:
            if not isinstance(result["completion"], bool):
                self.return_error("Result completion must be a boolean value")

                # If response in result, ensure it is a string
        if "response" in result:
            if not isinstance(result["response"], basestring):
                self.return_error("Result response must be a string")

                # If extensions, validate
        if "extensions" in result:
            self.validate_extensions(result["extensions"], "result")

            # If score included, validate it
        if "score" in result:
            self.validate_score(result["score"])
Example #2
0
    def validate_result(self, result):
        # Ensure incoming result is dict and check allowed fields
        self.check_if_dict(result, "Result")
        self.check_allowed_fields(result_allowed_fields, result, "Result")

        # If duration included, ensure valid duration can be parsed from it
        if 'duration' in result:
            try:
                parse_duration(result['duration'])
            except ISO8601Error as e:
                self.return_error("Error with result duration - %s" %
                                  e.message)

        # If success or completion included, ensure they are boolean
        if 'success' in result:
            if not isinstance(result['success'], bool):
                self.return_error("Result success must be a boolean value")
        if 'completion' in result:
            if not isinstance(result['completion'], bool):
                self.return_error("Result completion must be a boolean value")

        # If response in result, ensure it is a string
        if 'response' in result:
            if not isinstance(result['response'], basestring):
                self.return_error("Result response must be a string")

        # If extensions, validate
        if 'extensions' in result:
            self.validate_extensions(result['extensions'], 'result')

        # If score included, validate it
        if 'score' in result:
            self.validate_score(result['score'])
Example #3
0
	def validate_result(self, result):
		# Ensure incoming result is dict and check allowed fields
		self.check_if_dict(result, "Result")
		self.check_allowed_fields(result_allowed_fields, result, "Result")

		# If duration included, ensure valid duration can be parsed from it
		if 'duration' in result:
			try:
				parse_duration(result['duration'])
			except Exception as e:
				self.return_error("Error with result duration - %s" % e.message)

		# If success or completion included, ensure they are boolean
		if 'success' in result:
			if not isinstance(result['success'], bool):
				self.return_error("Result success must be a boolean value")
		if 'completion' in result:
			if not isinstance(result['completion'], bool):
				self.return_error("Result completion must be a boolean value")

		# If response in result, ensure it is a string
		if 'response' in result:
			if not isinstance(result['response'], basestring):
				self.return_error("Result response must be a string")

		# If extensions, validate
		if 'extensions' in result:
			self.validate_extensions(result['extensions'], 'result')

		# If score included, validate it
		if 'score' in result:
			self.validate_score(result['score'])
Example #4
0
    def populateResult(self, stmt_data, verb):
        log_message(self.log_dict, "Populating result", __name__, self.populateResult.__name__)

        resultExts = {}                    
        #Catch contradictory results
        if 'extensions' in stmt_data['result']:
            result = {key: value for key, value in stmt_data['result'].items() if not key == 'extensions'}
            resultExts = stmt_data['result']['extensions']   
        else:
            result = stmt_data['result']

        self.validateVerbResult(result, verb, stmt_data['object'])

        # Validate duration, throw error if duration is not formatted correctly
        if 'duration' in result:
            try:
                dur = parse_duration(result['duration'])
            except ISO8601Error as e:
                log_message(self.log_dict, e.message, __name__, self.populateResult.__name__, True)
                update_parent_log_status(self.log_dict, 400)
                raise exceptions.ParamError(e.message)

        #Once found that the results are valid against the verb, check score object and save
        if 'score' in result.keys():
            result['score'] = self.validateScoreResult(result['score'])
            result['score'] = self.saveScoreToDB(result['score'])


        #Save result
        return self.saveResultToDB(result, resultExts)
Example #5
0
    def populateResult(self, stmt_data):
        log_message(self.log_dict, "Populating result", __name__, self.populateResult.__name__)

        resultExts = {}                    
        #Catch contradictory results
        if 'extensions' in stmt_data['result']:
            result = dict((key, value) for (key, value) in stmt_data['result'].items() if not key == 'extensions')
            resultExts = stmt_data['result']['extensions']   
        else:
            result = stmt_data['result']

        # Validate duration, throw error if duration is not formatted correctly
        if 'duration' in result:
            try:
                dur = parse_duration(result['duration'])
            except ISO8601Error as e:
                log_message(self.log_dict, e.message, __name__, self.populateResult.__name__, True)
                update_parent_log_status(self.log_dict, 400)
                raise exceptions.ParamError(e.message)

        if 'score' in result.keys():
            result['score'] = self.validateScoreResult(result['score'])
            result['score'] = self.saveScoreToDB(result['score'])


        #Save result
        return self.saveResultToDB(result, resultExts)
Example #6
0
def isoduration_to_seconds(d):
    #=============================
    """
  Convert an ISO duration to a number of seconds.

  :param v: A string representing a duration, formatted as ISO 8601.
  :return: The number of seconds.
  :rtype: float
  """
    try:
        td = isoduration.parse_duration(d)
        return td.days * 86400 + td.seconds + td.microseconds / 1000000.0
    except:
        try:
            return float(d)  ## Virtuoso strips "PT" etc on import...
        except:
            return 0
Example #7
0
def isoduration_to_seconds(d):
    # =============================
    """
  Convert an ISO duration to a number of seconds.

  :param v: A string representing a duration, formatted as ISO 8601.
  :return: The number of seconds.
  :rtype: float
  """
    try:
        td = isoduration.parse_duration(d)
        return td.days * 86400 + td.seconds + td.microseconds / 1000000.0
    except:
        try:
            return float(d)  ## Virtuoso strips "PT" etc on import...
        except:
            return 0
Example #8
0
    def scrape(self, video_id, fileName):

        mongo_client = MongoClient()
        db = mongo_client['nekotube']
        es = Elasticsearch()

        # Loads metadata
        metadata = self.youtube.videos().list(part='snippet,contentDetails',
                                              id=video_id).execute()

        try:
            snippet = metadata['items'][0]['snippet']
        except:
            raise ValueError('Video not found.')

        try:
            duration = parse_duration(metadata['items'][0]['contentDetails']
                                      ['duration']).total_seconds()
        except:
            raise ValueError('Could not parse duration')

        # Loads captions for specified YouTube video id
        if len(fileName) == 0:
            results = self.youtube.captions().list(part='snippet',
                                                   videoId=video_id).execute()

            if 'items' not in results:
                raise ValueError('no captions in video ' + video_id)

            # Locate captions resource
            captions_resource = None
            for item in results['items']:
                if item['snippet']['language'] == 'ja':
                    captions_resource = item

            if captions_resource is None:
                raise ValueError('no Japanese captions in video ' + video_id)

            # Download raw captions file from youtube
            captions = self.youtube.captions().download(
                id=captions_resource['id']).execute().decode('utf-8')
        else:
            # Transcript file was provided so skip the YouTube captions download
            f = open(fileName, encoding='utf-8')
            captions = f.read()
            f.close()

        # Tokenizes the Japanese captions and translates Kanji into Hirigana
        parser = CaptionParser(captions)
        parsed_captions = parser.parse()

        # Create record in Mongo
        result = db.videos.insert_one({
            'youtubeVideoId': video_id,
            'title': snippet['title'],
            'thumbnails': snippet['thumbnails'],
            'captionData': parsed_captions,
            'duration': duration
        })
        print('Video inserted. id: {}'.format(result.inserted_id))

        # Index each caption line
        for line_index, line in enumerate(parsed_captions):
            doc = {
                'youtubeVideoId': video_id,
                'refId': str(result.inserted_id),
                'chunkIndex': line_index,
                'original': line['original'],
                'inverted': line['inverted']
            }
            res = es.index(index='nekotube', doc_type='caption_line', body=doc)
            print(res)

        print(json.dumps(parsed_captions))
Example #9
0
def parse_interval(interval_string):
    if not isinstance(interval_string, string_types):
        raise TypeError("Expecing a string")

    segment_count = interval_string.count(SEGMENT_DELIM)
    if segment_count < 1 or segment_count > 2:
        raise ISO8601Error(
            "Improper number of interval string segments. Must have 1 or 2")

    segments = interval_string.split(SEGMENT_DELIM)
    for idx, seg in enumerate(segments):
        if len(seg) == 0:
            return ISO8601Error("Interval segment index %s was empty" % idx)

    count = None
    if len(segments) == 3:
        # Rn/start/end
        # Rn/start/duration
        # Rn/duration/end
        s0 = segments[0]
        match = ISO8601_REPEAT_REGEX.match(s0)
        if not match:
            raise ISO8601Error("Repeat notation did not match expected")
        groups = match.groupdict()
        count = groups.get("count", None)
        if len(count) > 0:
            count = int(count)
        segments = segments[1:]

    s0 = segments[0]
    s1 = segments[1]
    # remaining segments are either
    # 1) start/end.
    #     start must be a fully specified datetime format
    #     end can either be a time, date, or datetime
    # 2) start/duration
    #     start must be a fully specified datetime format
    #     duration must be a valid duration format
    # 3) duration/end
    #     duration must be a valid duration format
    #     end must be a fully specified datetime format
    start = None
    end = None
    duration = None
    try:  # (1)
        start = parse_datetime(s0)
        print("second to last term is a datetime")
    except:
        try:
            duration = parse_duration(s0)
            print("second to last term is a datetime")
        except:
            raise ISO8601Error(
                "First term after repeat must be either " +
                "a fully specified datetime or a valid duration")
    # look at last term
    # this isn't the prettiest way to do it, but it is effective
    # could also build the regexes from other modules, but delegation avoids code duplication
    if start:
        # last term must be a duration, date, time or datetime
        try:
            end = parse_datetime(s1)
            print("last term is a datetime")
        except:
            try:
                end = parse_date(s1)
                print("last term is a date")
            except:
                try:
                    end = parse_time(s1)
                    print("last term is a time")
                except:
                    try:
                        duration = parse_duration(s1)
                        print("last term is a duration")
                    except:
                        raise ISO8601Error(
                            "When first term after repeat is a datetime, " +
                            "last term must be either a duration, datetime, date, or time"
                        )
    elif duration:
        # last term must be the end datetime
        try:
            end = parse_datetime(s1)
        except:
            raise ISO8601Error("If first term after repeat is a duration, " +
                               "last term must be a datetime")

    interval = Interval(start=start, end=end, duration=duration, repeat=count)
    print(interval)
Example #10
0
def duration_from_iso(iso_duration: str) -> Duration:
    """
    Converts an ISO-8601 format duration into a :class:`pendulum.Duration`.

    Raises:

        - :exc:`isodate.isoerror.ISO8601Error` for bad input
        - :exc:`ValueError` if the input had non-integer year or month values

    - The ISO-8601 duration format is ``P[n]Y[n]M[n]DT[n]H[n]M[n]S``; see
      https://en.wikipedia.org/wiki/ISO_8601#Durations.

    - ``pendulum.Duration.min`` and ``pendulum.Duration.max`` values are
      ``Duration(weeks=-142857142, days=-5)`` and ``Duration(weeks=142857142,
      days=6)`` respectively.

    - ``isodate`` supports negative durations of the format ``-P<something>``,
      such as ``-PT5S`` for "minus 5 seconds", but not e.g. ``PT-5S``.

    - I'm not clear if ISO-8601 itself supports negative durations. This
      suggests not: https://github.com/moment/moment/issues/2408. But lots of
      implementations (including to some limited extent ``isodate``) do support
      this concept.

    .. code-block:: python

        from pendulum import DateTime
        from cardinal_pythonlib.datetimefunc import duration_from_iso
        from cardinal_pythonlib.logs import main_only_quicksetup_rootlogger
        main_only_quicksetup_rootlogger()

        d1 = duration_from_iso("P5W")
        d2 = duration_from_iso("P3Y1DT3H1M2S")
        d3 = duration_from_iso("P7000D")
        d4 = duration_from_iso("P1Y7000D")
        d5 = duration_from_iso("PT10053.22S")
        d6 = duration_from_iso("PT-10053.22S")  # raises ISO8601 error
        d7 = duration_from_iso("-PT5S")
        d7 = duration_from_iso("PT-5S")  # raises ISO8601 error
        now = DateTime.now()
        print(now)
        print(now + d1)
        print(now + d2)
        print(now + d3)
        print(now + d4)

    """
    duration = parse_duration(
        iso_duration
    )  # type: Union[datetime.timedelta, IsodateDuration]  # noqa
    if isinstance(duration, datetime.timedelta):
        result = pendulum_duration_from_timedelta(duration)
    elif isinstance(duration, IsodateDuration):
        result = pendulum_duration_from_isodate_duration(duration)
    else:
        raise AssertionError(
            f"Bug in isodate.parse_duration, which returned unknown duration "
            f"type: {duration!r}")
    # log.debug("Converted {!r} -> {!r} -> {!r}".format(
    #     iso_duration, duration, result))
    return result