Beispiel #1
0
def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
    """
    Creates xml for transcripts.
    For each transcript element, an associated transcript file is also created in course OLX.

    Arguments:
        video_id (str): Video id of the video.
        video_el (Element): lxml Element object
        static_dir (str): The Directory to store transcript file.
        resource_fs (SubFS): The file system to store transcripts.

    Returns:
        lxml Element object with transcripts information
    """
    video_transcripts = VideoTranscript.objects.filter(
        video__edx_video_id=video_id).order_by('language_code')
    # create transcripts node only when we have transcripts for a video
    if video_transcripts.exists():
        transcripts_el = SubElement(video_el, 'transcripts')

    # Create static directory based on the file system's subdirectory,
    # falling back to default path in case of an error
    try:
        # File system should not start from /draft directory.
        static_file_dir = combine(
            resource_fs._sub_dir.split('/')[1], static_dir)  # pylint: disable=protected-access
    except KeyError:
        logger.exception(
            "VAL Transcript Export: Error creating static directory path for video {} in file system {}"
            .format(video_id, resource_fs))
        static_file_dir = combine('course', static_dir)

    transcript_files_map = {}
    for video_transcript in video_transcripts:
        language_code = video_transcript.language_code
        file_format = video_transcript.file_format

        try:
            transcript_filename = create_transcript_file(
                video_id=video_id,
                language_code=language_code,
                file_format=file_format,
                resource_fs=resource_fs.delegate_fs(),
                static_dir=static_file_dir)
            transcript_files_map[language_code] = transcript_filename
        except TranscriptsGenerationException:
            # we don't want to halt export in this case, just log and move to the next transcript.
            logger.exception(
                '[VAL] Error while generating "%s" transcript for video["%s"].',
                language_code, video_id)
            continue

        SubElement(
            transcripts_el, 'transcript', {
                'language_code': language_code,
                'file_format': Transcript.SRT,
                'provider': video_transcript.provider,
            })

    return dict(xml=video_el, transcripts=transcript_files_map)
 def FindFiles(self, FileName, FillFindData, DokanFileInfo):
     FileName = self._dokanpath2pyfs(FileName)
     for (nm, finfo) in self.fs.listdirinfo(FileName):
         fpath = combine(FileName, nm)
         if self._is_pending_delete(fpath):
             continue
         data = self._info2finddataw(fpath, finfo)
         FillFindData(ctypes.byref(data), DokanFileInfo)
     return STATUS_SUCCESS
Beispiel #3
0
def import_transcript_from_fs(edx_video_id, language_code, file_name, provider,
                              resource_fs, static_dir):
    """
    Imports transcript file from file system and creates transcript record in DS.

    Arguments:
        edx_video_id (str): Video id of the video.
        language_code (unicode): Language code of the requested transcript.
        file_name (unicode): File name of the transcript file.
        provider (unicode): Transcript provider.
        resource_fs (OSFS): Import file system.
        static_dir (str): The Directory to retrieve transcript file.
    """
    file_format = None
    transcript_data = get_video_transcript_data(edx_video_id, language_code)

    # First check if transcript record does not exist.
    if not transcript_data:
        # Read file from import file system and attach it to transcript record in DS.
        try:
            with resource_fs.open(combine(static_dir, file_name),
                                  'r',
                                  encoding='utf-8-sig') as f:
                file_content = f.read()
        except ResourceNotFound:
            # Don't raise exception in case transcript file is not found in course OLX.
            logger.warning(
                '[edx-val] "%s" transcript "%s" for video "%s" is not found.',
                language_code, file_name, edx_video_id)
            return
        except UnicodeDecodeError:
            # Don't raise exception in case transcript contains non-utf8 content.
            logger.warning(
                '[edx-val] "%s" transcript "%s" for video "%s" contains a non-utf8 file content.',
                language_code, file_name, edx_video_id)
            return

        # Get file format from transcript content.
        try:
            file_format = get_transcript_format(file_content)
        except Error:
            # Don't raise exception, just don't create transcript record.
            logger.warning(
                '[edx-val] Error while getting transcript format for video=%s -- language_code=%s --file_name=%s',
                edx_video_id, language_code, file_name)
            return

        # Create transcript record.
        create_video_transcript(video_id=edx_video_id,
                                language_code=language_code,
                                file_format=file_format,
                                content=ContentFile(file_content),
                                provider=provider)
 def FindFilesWithPattern(self, FileName, SearchPattern, FillFindData,
                          DokanFileInfo):
     FileName = self._dokanpath2pyfs(FileName)
     for nm in self.fs.listdir(FileName):
         fpath = combine(FileName, nm)
         finfo = self.fs.getinfo(fpath, namespaces=['basic', 'details'])
         if self._is_pending_delete(fpath):
             continue
         if not libdokan.DokanIsNameInExpression(SearchPattern, nm, True):
             continue
         data = self._info2finddataw(fpath, finfo, None)
         FillFindData(ctypes.byref(data), DokanFileInfo)
Beispiel #5
0
def create_file_in_fs(file_data, file_name, file_system, static_dir):
    """
    Writes file in specific file system.

    Arguments:
        file_data (str): Data to store into the file.
        file_name (str): File name of the file to be created.
        file_system (OSFS): Import file system.
        static_dir (str): The Directory to retrieve transcript file.
    """
    with file_system.open(combine(static_dir, file_name), 'wb') as f:
        f.write(file_data.encode('utf-8'))
Beispiel #6
0
def create_file_in_fs(file_data, file_name, file_system, static_dir):
    """
    Writes file in specific file system.

    Arguments:
        file_data (str): Data to store into the file.
        file_name (str): File name of the file to be created.
        file_system (OSFS): Import file system.
        static_dir (str): The Directory to retrieve transcript file.
    """
    with file_system.open(combine(static_dir, file_name), 'wb') as f:
        f.write(file_data.encode('utf-8'))
Beispiel #7
0
def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
    """
    Creates xml for transcripts.
    For each transcript element, an associated transcript file is also created in course OLX.

    Arguments:
        video_id (str): Video id of the video.
        video_el (Element): lxml Element object
        static_dir (str): The Directory to store transcript file.
        resource_fs (SubFS): The file system to store transcripts.

    Returns:
        lxml Element object with transcripts information
    """
    video_transcripts = VideoTranscript.objects.filter(video__edx_video_id=video_id).order_by('language_code')
    # create transcripts node only when we have transcripts for a video
    if video_transcripts.exists():
        transcripts_el = SubElement(video_el, 'transcripts')

    transcript_files_map = {}
    for video_transcript in video_transcripts:
        language_code = video_transcript.language_code
        file_format = video_transcript.file_format

        try:
            transcript_filename = create_transcript_file(
                video_id=video_id,
                language_code=language_code,
                file_format=file_format,
                resource_fs=resource_fs.delegate_fs(),
                static_dir=combine(u'course', static_dir)  # File system should not start from /draft directory.
            )
            transcript_files_map[language_code] = transcript_filename
        except TranscriptsGenerationException:
            # we don't want to halt export in this case, just log and move to the next transcript.
            logger.exception('[VAL] Error while generating "%s" transcript for video["%s"].', language_code, video_id)
            continue

        SubElement(
            transcripts_el,
            'transcript',
            {
                'language_code': language_code,
                'file_format': Transcript.SRT,
                'provider': video_transcript.provider,
            }
        )

    return dict(xml=video_el, transcripts=transcript_files_map)
Beispiel #8
0
    def _get_module_info(self, fullname):
        if not fullname.startswith("__moyapy__"):
            raise ImportError(fullname)

        path = self._get_path(fullname)

        module_path, _type = self._find_module_file(path)
        if module_path is not None:
            return module_path, _type, False

        module_path, _type = self._find_module_file(combine(path, "__init__"))
        if module_path is not None:
            return module_path, _type, True

        raise ImportError(fullname)
Beispiel #9
0
 def iterkeys(self, root="/", m=None):
     """Iterate over all keys beginning with the given root path."""
     if m is None:
         m = self._map
         for name in iteratepath(root):
             try:
                 m = m[name]
             except KeyError:
                 return
     for (nm, subm) in m.iteritems():
         if not nm:
             yield abspath(root)
         else:
             k = combine(root, nm)
             for subk in self.iterkeys(k, subm):
                 yield subk
Beispiel #10
0
 def itervalues(self, root="/", m=None):
     """Iterate over all values whose keys begin with the given root path."""
     root = normpath(root)
     if m is None:
         m = self._map
         for name in iteratepath(root):
             try:
                 m = m[name]
             except KeyError:
                 return
     for (nm, subm) in m.iteritems():
         if not nm:
             yield subm
         else:
             k = combine(root, nm)
             for subv in self.itervalues(k, subm):
                 yield subv
Beispiel #11
0
 def iteritems(self, root="/", m=None):
     """Iterate over all (key,value) pairs beginning with the given root."""
     root = normpath(root)
     if m is None:
         m = self._map
         for name in iteratepath(root):
             try:
                 m = m[name]
             except KeyError:
                 return
     for (nm, subm) in m.iteritems():
         if not nm:
             yield (abspath(normpath(root)), subm)
         else:
             k = combine(root, nm)
             for (subk, subv) in self.iteritems(k, subm):
                 yield (subk, subv)
Beispiel #12
0
    def handle(self, *args, **options):
        if not settings.ENABLE_ZIP_DOWNLOAD:
            raise CommandError(
                'To enable zip download, set ENABLE_ZIP_DOWNLOAD to True in settings.py'
            )

        lock = fasteners.InterProcessLock(
            path.combine(settings.IMAGE_PATH, 'zip.lock'))
        gotten = lock.acquire(blocking=False)
        if gotten:
            while True:
                for imageset in ImageSet.objects.filter(~Q(
                        zip_state=ImageSet.ZipState.READY)):
                    self._regenerate_zip(imageset)
                sleep(10)
        else:
            raise CommandError(
                'The lockfile is present. There seems to be another instance of the zip daemon running.\n'
                'Please stop it before starting a new one.\n'
                'If this problem persists, delete {}.\n'.format(lock.path))
Beispiel #13
0
    def collect_xml(self, acron, xml):
        issue_folder = path.basename(path.dirname(xml))

        file_name_ext = path.basename(xml)

        file_name, _ = path.splitext(file_name_ext)

        target_folder = path.join(acron, issue_folder, file_name)

        logging.info("Make dir package: %s" % target_folder)

        self.out_fs.makedirs(target_folder, recreate=True)

        xml_path = path.combine(acron, xml)

        target_xml_path = path.join(acron, issue_folder, file_name,
                                    file_name_ext)

        self.copy(xml_path, target_xml_path)
        return issue_folder, file_name
Beispiel #14
0
 def tmp_path(self):
     return path.combine(settings.TMP_IMAGE_PATH, self.path)
Beispiel #15
0
 def zip_path(self):
     return path.combine(self.root_path(), self.zip_name())
Beispiel #16
0
def create_transcripts_xml(video_id, video_el, resource_fs, static_dir):
    """
    Creates xml for transcripts.
    For each transcript element, an associated transcript file is also created in course OLX.

    Arguments:
        video_id (str): Video id of the video.
        video_el (Element): lxml Element object
        static_dir (str): The Directory to store transcript file.
        resource_fs (SubFS|WrapFS): The file system to store transcripts.

    resource_fs is usually a SubFS, but can be a WrapFS in places like exporting olx through the olx_rest_api.
    This makes a difference because WrapFS does not have the _sub_dir attribute.

    Returns:
        lxml Element object with transcripts information
    """
    video_transcripts = VideoTranscript.objects.filter(video__edx_video_id=video_id).order_by('language_code')
    # create transcripts node only when we have transcripts for a video
    if video_transcripts.exists():
        transcripts_el = SubElement(video_el, 'transcripts')

    # Note: file system should not start from /draft directory.
    static_file_dir = combine('course', static_dir)
    # If we're in a sub directory (ie. a SubFS instead of a WrapFS),
    # we need to try to base the static file directory on the second path segment,
    # which will be the course run part for old mongodb key format courses.
    # See https://openedx.atlassian.net/browse/TNL-7338
    if hasattr(resource_fs, '_sub_dir'):
        try:
            static_file_dir = combine(resource_fs._sub_dir.split('/')[1], static_dir)  # pylint: disable=protected-access
        except KeyError:
            logger.exception(
                "VAL Transcript Export: Error creating static directory path for video {} in file system {}".format(
                    video_id, resource_fs
                )
            )

    transcript_files_map = {}
    for video_transcript in video_transcripts:
        language_code = video_transcript.language_code
        file_format = video_transcript.file_format

        try:
            transcript_filename = create_transcript_file(
                video_id=video_id,
                language_code=language_code,
                file_format=file_format,
                resource_fs=resource_fs.delegate_fs(),
                static_dir=static_file_dir
            )
            transcript_files_map[language_code] = transcript_filename
        except TranscriptsGenerationException:
            # we don't want to halt export in this case, just log and move to the next transcript.
            logger.exception('[VAL] Error while generating "%s" transcript for video["%s"].', language_code, video_id)
            continue

        SubElement(
            transcripts_el,
            'transcript',
            {
                'language_code': language_code,
                'file_format': Transcript.SRT,
                'provider': video_transcript.provider,
            }
        )

    return dict(xml=video_el, transcripts=transcript_files_map)
Beispiel #17
0
 def join(self, name):
     return Path(fp.combine(self.route, name), self.protocol)
Beispiel #18
0
 def __truediv__(self, name):
     return Path(fp.combine(self._path, name))
Beispiel #19
0
def import_transcript_from_fs(edx_video_id, language_code, file_name, provider, resource_fs, static_dir):
    """
    Imports transcript file from file system and creates transcript record in DS.

    Arguments:
        edx_video_id (str): Video id of the video.
        language_code (unicode): Language code of the requested transcript.
        file_name (unicode): File name of the transcript file.
        provider (unicode): Transcript provider.
        resource_fs (OSFS): Import file system.
        static_dir (str): The Directory to retrieve transcript file.
    """
    file_format = None
    existing_transcript = VideoTranscript.get_or_none(edx_video_id, language_code)

    # check if the transcript exists and if it does, make sure that overriding
    # existing transcripts is enabled before proceeding to import it
    if (existing_transcript and
            not OVERRIDE_EXISTING_IMPORTED_TRANSCRIPTS.is_enabled()):
        return

    # Read file from import file system and attach it to transcript record in DS.
    try:
        with resource_fs.open(combine(static_dir, file_name), 'r', encoding='utf-8-sig') as f:
            file_content = f.read()
    except ResourceNotFound:
        # Don't raise exception in case transcript file is not found in course OLX.
        logger.warning(
            '[edx-val] "%s" transcript "%s" for video "%s" is not found.',
            language_code,
            file_name,
            edx_video_id
        )
        return
    except UnicodeDecodeError:
        # Don't raise exception in case transcript contains non-utf8 content.
        logger.warning(
            '[edx-val] "%s" transcript "%s" for video "%s" contains a non-utf8 file content.',
            language_code,
            file_name,
            edx_video_id
        )
        return

    # change file content to utf8
    utf8_encoded_file_content = file_content.encode('utf-8')
    new_transcript_content_file = ContentFile(utf8_encoded_file_content)

    # check if transcript content already exists, and if it does, make sure
    # the transcript isn't a duplicate transcript to the already existing one
    if (existing_transcript and
            is_duplicate_file(new_transcript_content_file, existing_transcript.transcript.file)):
        return

    # Get file format from transcript content.
    try:
        file_format = get_transcript_format(file_content)
    except Error:
        # Don't raise exception, just don't create transcript record.
        logger.warning(
            '[edx-val] Error while getting transcript format for video=%s -- language_code=%s --file_name=%s',
            edx_video_id,
            language_code,
            file_name
        )
        return

    # Create transcript record.
    create_or_update_video_transcript(
        video_id=edx_video_id,
        language_code=language_code,
        metadata={
            'provider': provider,
            'file_format': file_format,
            'language_code': language_code,
        },
        file_data=new_transcript_content_file,
    )
Beispiel #20
0
 def __truediv__(self, name):
     return Path(fp.combine(self.abs, name), self.url_spec)
Beispiel #21
0
 def root_path(self):
     return path.combine(settings.IMAGE_PATH, self.path)
Beispiel #22
0
 def tmp_zip_path(self):
     return path.combine(self.tmp_path(), self.zip_name())
Beispiel #23
0
def import_transcript_from_fs(edx_video_id, language_code, file_name, provider, resource_fs, static_dir):
    """
    Imports transcript file from file system and creates transcript record in DS.

    Arguments:
        edx_video_id (str): Video id of the video.
        language_code (unicode): Language code of the requested transcript.
        file_name (unicode): File name of the transcript file.
        provider (unicode): Transcript provider.
        resource_fs (OSFS): Import file system.
        static_dir (str): The Directory to retrieve transcript file.
    """
    file_format = None
    transcript_data = get_video_transcript_data(edx_video_id, language_code)

    # First check if transcript record does not exist.
    if not transcript_data:
        # Read file from import file system and attach it to transcript record in DS.
        try:
            with resource_fs.open(combine(static_dir, file_name), 'rb') as f:
                file_content = f.read()
                file_content = file_content.decode('utf-8-sig')
        except ResourceNotFound as exc:
            # Don't raise exception in case transcript file is not found in course OLX.
            logger.warn(
                '[edx-val] "%s" transcript "%s" for video "%s" is not found.',
                language_code,
                file_name,
                edx_video_id
            )
            return
        except UnicodeDecodeError:
            # Don't raise exception in case transcript contains non-utf8 content.
            logger.warn(
                '[edx-val] "%s" transcript "%s" for video "%s" contains a non-utf8 file content.',
                language_code,
                file_name,
                edx_video_id
            )
            return

        # Get file format from transcript content.
        try:
            file_format = get_transcript_format(file_content)
        except Error as ex:
            # Don't raise exception, just don't create transcript record.
            logger.warn(
                '[edx-val] Error while getting transcript format for video=%s -- language_code=%s --file_name=%s',
                edx_video_id,
                language_code,
                file_name
            )
            return

        # Create transcript record.
        create_video_transcript(
            video_id=edx_video_id,
            language_code=language_code,
            file_format=file_format,
            content=ContentFile(file_content),
            provider=provider
        )
Beispiel #24
0
 def path(self):
     return path.combine(self.image_set.root_path(), self.filename)
Beispiel #25
0
    def __init__(self, filepath, base_folder, force_reopen_files, poll_time,
                 window):
        (rel_path, _, filename) = filepath.rpartition(
            '/'
        )  # extract '/stats.csv' from path; forward slash guaranteed by pyfilesystem
        self.name = rel_path[
            1:]  # the experiment name is the directory path, relative to the base folder
        self.filename = filename
        self.directory = fs_path.combine(base_folder, rel_path)

        self.meta = {}
        self.metrics = []  # names of metrics
        self.data = []  # data for each metric (one list per metric)
        self.done = False  # true after reading and the experiment is done writing too

        # start hidden if the user hid it the last time (this is a persistent setting)
        if self.directory in window.hidden_exp_paths:
            self.visible = False
            del window.hidden_exp_paths[self.directory]
        else:
            self.visible = True

        self.is_selected = False
        self.is_filtered = False
        self.style_idx = None  # style index, for when it's assigned (see Plots.assign_exp_style)

        # register this experiment with the main window
        self.window = window
        self.table_row = None  # used internally by the window
        window.on_exp_init(self)

        logger.debug(
            f"Initializing ExperimentReader and thread to load {self.name}")

        # create reader object and thread
        self.reader = ExperimentReader(filename=filename,
                                       directory=self.directory,
                                       force_reopen_files=force_reopen_files,
                                       poll_time=poll_time,
                                       name=self.name)
        self.thread = QThread()

        # connect ExperimentReader's signals to Experiment method slots, to return data
        self.reader.meta_ready.connect(self.on_meta_ready)
        self.reader.header_ready.connect(self.on_header_ready)
        self.reader.data_ready.connect(self.on_data_ready)
        self.reader.done.connect(self.on_done)

        self.reader.moveToThread(
            self.thread)  # move the reader object to the thread

        self.reader.done.connect(
            self.thread.quit
        )  # connect reader done signal to terminate thread slot
        self.thread.started.connect(
            self.reader.start_reading
        )  # connect thread started signal to reader slot

        self.thread.start()  # start thread

        logger.debug(f"Moved ExperimentReader to thread, for {self.name}")
Beispiel #26
0
 def test_combine(self):
     self.assertEqual(combine("", "bar"), "bar")
     self.assertEqual(combine("foo", "bar"), "foo/bar")
Beispiel #27
0
 def relative_zip_path(self):
     return path.combine(self.path, self.zip_name())
Beispiel #28
0
 def relative_path(self):
     return path.combine(self.image_set.path, self.filename)