def create_transcripts_xml(video_id, video_el, resource_fs, static_dir): """ Creates xml for transcripts. For each transcript element, an associated transcript file is also created in course OLX. Arguments: video_id (str): Video id of the video. video_el (Element): lxml Element object static_dir (str): The Directory to store transcript file. resource_fs (SubFS): The file system to store transcripts. Returns: lxml Element object with transcripts information """ video_transcripts = VideoTranscript.objects.filter( video__edx_video_id=video_id).order_by('language_code') # create transcripts node only when we have transcripts for a video if video_transcripts.exists(): transcripts_el = SubElement(video_el, 'transcripts') # Create static directory based on the file system's subdirectory, # falling back to default path in case of an error try: # File system should not start from /draft directory. static_file_dir = combine( resource_fs._sub_dir.split('/')[1], static_dir) # pylint: disable=protected-access except KeyError: logger.exception( "VAL Transcript Export: Error creating static directory path for video {} in file system {}" .format(video_id, resource_fs)) static_file_dir = combine('course', static_dir) transcript_files_map = {} for video_transcript in video_transcripts: language_code = video_transcript.language_code file_format = video_transcript.file_format try: transcript_filename = create_transcript_file( video_id=video_id, language_code=language_code, file_format=file_format, resource_fs=resource_fs.delegate_fs(), static_dir=static_file_dir) transcript_files_map[language_code] = transcript_filename except TranscriptsGenerationException: # we don't want to halt export in this case, just log and move to the next transcript. logger.exception( '[VAL] Error while generating "%s" transcript for video["%s"].', language_code, video_id) continue SubElement( transcripts_el, 'transcript', { 'language_code': language_code, 'file_format': Transcript.SRT, 'provider': video_transcript.provider, }) return dict(xml=video_el, transcripts=transcript_files_map)
def FindFiles(self, FileName, FillFindData, DokanFileInfo): FileName = self._dokanpath2pyfs(FileName) for (nm, finfo) in self.fs.listdirinfo(FileName): fpath = combine(FileName, nm) if self._is_pending_delete(fpath): continue data = self._info2finddataw(fpath, finfo) FillFindData(ctypes.byref(data), DokanFileInfo) return STATUS_SUCCESS
def import_transcript_from_fs(edx_video_id, language_code, file_name, provider, resource_fs, static_dir): """ Imports transcript file from file system and creates transcript record in DS. Arguments: edx_video_id (str): Video id of the video. language_code (unicode): Language code of the requested transcript. file_name (unicode): File name of the transcript file. provider (unicode): Transcript provider. resource_fs (OSFS): Import file system. static_dir (str): The Directory to retrieve transcript file. """ file_format = None transcript_data = get_video_transcript_data(edx_video_id, language_code) # First check if transcript record does not exist. if not transcript_data: # Read file from import file system and attach it to transcript record in DS. try: with resource_fs.open(combine(static_dir, file_name), 'r', encoding='utf-8-sig') as f: file_content = f.read() except ResourceNotFound: # Don't raise exception in case transcript file is not found in course OLX. logger.warning( '[edx-val] "%s" transcript "%s" for video "%s" is not found.', language_code, file_name, edx_video_id) return except UnicodeDecodeError: # Don't raise exception in case transcript contains non-utf8 content. logger.warning( '[edx-val] "%s" transcript "%s" for video "%s" contains a non-utf8 file content.', language_code, file_name, edx_video_id) return # Get file format from transcript content. try: file_format = get_transcript_format(file_content) except Error: # Don't raise exception, just don't create transcript record. logger.warning( '[edx-val] Error while getting transcript format for video=%s -- language_code=%s --file_name=%s', edx_video_id, language_code, file_name) return # Create transcript record. create_video_transcript(video_id=edx_video_id, language_code=language_code, file_format=file_format, content=ContentFile(file_content), provider=provider)
def FindFilesWithPattern(self, FileName, SearchPattern, FillFindData, DokanFileInfo): FileName = self._dokanpath2pyfs(FileName) for nm in self.fs.listdir(FileName): fpath = combine(FileName, nm) finfo = self.fs.getinfo(fpath, namespaces=['basic', 'details']) if self._is_pending_delete(fpath): continue if not libdokan.DokanIsNameInExpression(SearchPattern, nm, True): continue data = self._info2finddataw(fpath, finfo, None) FillFindData(ctypes.byref(data), DokanFileInfo)
def create_file_in_fs(file_data, file_name, file_system, static_dir): """ Writes file in specific file system. Arguments: file_data (str): Data to store into the file. file_name (str): File name of the file to be created. file_system (OSFS): Import file system. static_dir (str): The Directory to retrieve transcript file. """ with file_system.open(combine(static_dir, file_name), 'wb') as f: f.write(file_data.encode('utf-8'))
def create_file_in_fs(file_data, file_name, file_system, static_dir): """ Writes file in specific file system. Arguments: file_data (str): Data to store into the file. file_name (str): File name of the file to be created. file_system (OSFS): Import file system. static_dir (str): The Directory to retrieve transcript file. """ with file_system.open(combine(static_dir, file_name), 'wb') as f: f.write(file_data.encode('utf-8'))
def create_transcripts_xml(video_id, video_el, resource_fs, static_dir): """ Creates xml for transcripts. For each transcript element, an associated transcript file is also created in course OLX. Arguments: video_id (str): Video id of the video. video_el (Element): lxml Element object static_dir (str): The Directory to store transcript file. resource_fs (SubFS): The file system to store transcripts. Returns: lxml Element object with transcripts information """ video_transcripts = VideoTranscript.objects.filter(video__edx_video_id=video_id).order_by('language_code') # create transcripts node only when we have transcripts for a video if video_transcripts.exists(): transcripts_el = SubElement(video_el, 'transcripts') transcript_files_map = {} for video_transcript in video_transcripts: language_code = video_transcript.language_code file_format = video_transcript.file_format try: transcript_filename = create_transcript_file( video_id=video_id, language_code=language_code, file_format=file_format, resource_fs=resource_fs.delegate_fs(), static_dir=combine(u'course', static_dir) # File system should not start from /draft directory. ) transcript_files_map[language_code] = transcript_filename except TranscriptsGenerationException: # we don't want to halt export in this case, just log and move to the next transcript. logger.exception('[VAL] Error while generating "%s" transcript for video["%s"].', language_code, video_id) continue SubElement( transcripts_el, 'transcript', { 'language_code': language_code, 'file_format': Transcript.SRT, 'provider': video_transcript.provider, } ) return dict(xml=video_el, transcripts=transcript_files_map)
def _get_module_info(self, fullname): if not fullname.startswith("__moyapy__"): raise ImportError(fullname) path = self._get_path(fullname) module_path, _type = self._find_module_file(path) if module_path is not None: return module_path, _type, False module_path, _type = self._find_module_file(combine(path, "__init__")) if module_path is not None: return module_path, _type, True raise ImportError(fullname)
def iterkeys(self, root="/", m=None): """Iterate over all keys beginning with the given root path.""" if m is None: m = self._map for name in iteratepath(root): try: m = m[name] except KeyError: return for (nm, subm) in m.iteritems(): if not nm: yield abspath(root) else: k = combine(root, nm) for subk in self.iterkeys(k, subm): yield subk
def itervalues(self, root="/", m=None): """Iterate over all values whose keys begin with the given root path.""" root = normpath(root) if m is None: m = self._map for name in iteratepath(root): try: m = m[name] except KeyError: return for (nm, subm) in m.iteritems(): if not nm: yield subm else: k = combine(root, nm) for subv in self.itervalues(k, subm): yield subv
def iteritems(self, root="/", m=None): """Iterate over all (key,value) pairs beginning with the given root.""" root = normpath(root) if m is None: m = self._map for name in iteratepath(root): try: m = m[name] except KeyError: return for (nm, subm) in m.iteritems(): if not nm: yield (abspath(normpath(root)), subm) else: k = combine(root, nm) for (subk, subv) in self.iteritems(k, subm): yield (subk, subv)
def handle(self, *args, **options): if not settings.ENABLE_ZIP_DOWNLOAD: raise CommandError( 'To enable zip download, set ENABLE_ZIP_DOWNLOAD to True in settings.py' ) lock = fasteners.InterProcessLock( path.combine(settings.IMAGE_PATH, 'zip.lock')) gotten = lock.acquire(blocking=False) if gotten: while True: for imageset in ImageSet.objects.filter(~Q( zip_state=ImageSet.ZipState.READY)): self._regenerate_zip(imageset) sleep(10) else: raise CommandError( 'The lockfile is present. There seems to be another instance of the zip daemon running.\n' 'Please stop it before starting a new one.\n' 'If this problem persists, delete {}.\n'.format(lock.path))
def collect_xml(self, acron, xml): issue_folder = path.basename(path.dirname(xml)) file_name_ext = path.basename(xml) file_name, _ = path.splitext(file_name_ext) target_folder = path.join(acron, issue_folder, file_name) logging.info("Make dir package: %s" % target_folder) self.out_fs.makedirs(target_folder, recreate=True) xml_path = path.combine(acron, xml) target_xml_path = path.join(acron, issue_folder, file_name, file_name_ext) self.copy(xml_path, target_xml_path) return issue_folder, file_name
def tmp_path(self): return path.combine(settings.TMP_IMAGE_PATH, self.path)
def zip_path(self): return path.combine(self.root_path(), self.zip_name())
def create_transcripts_xml(video_id, video_el, resource_fs, static_dir): """ Creates xml for transcripts. For each transcript element, an associated transcript file is also created in course OLX. Arguments: video_id (str): Video id of the video. video_el (Element): lxml Element object static_dir (str): The Directory to store transcript file. resource_fs (SubFS|WrapFS): The file system to store transcripts. resource_fs is usually a SubFS, but can be a WrapFS in places like exporting olx through the olx_rest_api. This makes a difference because WrapFS does not have the _sub_dir attribute. Returns: lxml Element object with transcripts information """ video_transcripts = VideoTranscript.objects.filter(video__edx_video_id=video_id).order_by('language_code') # create transcripts node only when we have transcripts for a video if video_transcripts.exists(): transcripts_el = SubElement(video_el, 'transcripts') # Note: file system should not start from /draft directory. static_file_dir = combine('course', static_dir) # If we're in a sub directory (ie. a SubFS instead of a WrapFS), # we need to try to base the static file directory on the second path segment, # which will be the course run part for old mongodb key format courses. # See https://openedx.atlassian.net/browse/TNL-7338 if hasattr(resource_fs, '_sub_dir'): try: static_file_dir = combine(resource_fs._sub_dir.split('/')[1], static_dir) # pylint: disable=protected-access except KeyError: logger.exception( "VAL Transcript Export: Error creating static directory path for video {} in file system {}".format( video_id, resource_fs ) ) transcript_files_map = {} for video_transcript in video_transcripts: language_code = video_transcript.language_code file_format = video_transcript.file_format try: transcript_filename = create_transcript_file( video_id=video_id, language_code=language_code, file_format=file_format, resource_fs=resource_fs.delegate_fs(), static_dir=static_file_dir ) transcript_files_map[language_code] = transcript_filename except TranscriptsGenerationException: # we don't want to halt export in this case, just log and move to the next transcript. logger.exception('[VAL] Error while generating "%s" transcript for video["%s"].', language_code, video_id) continue SubElement( transcripts_el, 'transcript', { 'language_code': language_code, 'file_format': Transcript.SRT, 'provider': video_transcript.provider, } ) return dict(xml=video_el, transcripts=transcript_files_map)
def join(self, name): return Path(fp.combine(self.route, name), self.protocol)
def __truediv__(self, name): return Path(fp.combine(self._path, name))
def import_transcript_from_fs(edx_video_id, language_code, file_name, provider, resource_fs, static_dir): """ Imports transcript file from file system and creates transcript record in DS. Arguments: edx_video_id (str): Video id of the video. language_code (unicode): Language code of the requested transcript. file_name (unicode): File name of the transcript file. provider (unicode): Transcript provider. resource_fs (OSFS): Import file system. static_dir (str): The Directory to retrieve transcript file. """ file_format = None existing_transcript = VideoTranscript.get_or_none(edx_video_id, language_code) # check if the transcript exists and if it does, make sure that overriding # existing transcripts is enabled before proceeding to import it if (existing_transcript and not OVERRIDE_EXISTING_IMPORTED_TRANSCRIPTS.is_enabled()): return # Read file from import file system and attach it to transcript record in DS. try: with resource_fs.open(combine(static_dir, file_name), 'r', encoding='utf-8-sig') as f: file_content = f.read() except ResourceNotFound: # Don't raise exception in case transcript file is not found in course OLX. logger.warning( '[edx-val] "%s" transcript "%s" for video "%s" is not found.', language_code, file_name, edx_video_id ) return except UnicodeDecodeError: # Don't raise exception in case transcript contains non-utf8 content. logger.warning( '[edx-val] "%s" transcript "%s" for video "%s" contains a non-utf8 file content.', language_code, file_name, edx_video_id ) return # change file content to utf8 utf8_encoded_file_content = file_content.encode('utf-8') new_transcript_content_file = ContentFile(utf8_encoded_file_content) # check if transcript content already exists, and if it does, make sure # the transcript isn't a duplicate transcript to the already existing one if (existing_transcript and is_duplicate_file(new_transcript_content_file, existing_transcript.transcript.file)): return # Get file format from transcript content. try: file_format = get_transcript_format(file_content) except Error: # Don't raise exception, just don't create transcript record. logger.warning( '[edx-val] Error while getting transcript format for video=%s -- language_code=%s --file_name=%s', edx_video_id, language_code, file_name ) return # Create transcript record. create_or_update_video_transcript( video_id=edx_video_id, language_code=language_code, metadata={ 'provider': provider, 'file_format': file_format, 'language_code': language_code, }, file_data=new_transcript_content_file, )
def __truediv__(self, name): return Path(fp.combine(self.abs, name), self.url_spec)
def root_path(self): return path.combine(settings.IMAGE_PATH, self.path)
def tmp_zip_path(self): return path.combine(self.tmp_path(), self.zip_name())
def import_transcript_from_fs(edx_video_id, language_code, file_name, provider, resource_fs, static_dir): """ Imports transcript file from file system and creates transcript record in DS. Arguments: edx_video_id (str): Video id of the video. language_code (unicode): Language code of the requested transcript. file_name (unicode): File name of the transcript file. provider (unicode): Transcript provider. resource_fs (OSFS): Import file system. static_dir (str): The Directory to retrieve transcript file. """ file_format = None transcript_data = get_video_transcript_data(edx_video_id, language_code) # First check if transcript record does not exist. if not transcript_data: # Read file from import file system and attach it to transcript record in DS. try: with resource_fs.open(combine(static_dir, file_name), 'rb') as f: file_content = f.read() file_content = file_content.decode('utf-8-sig') except ResourceNotFound as exc: # Don't raise exception in case transcript file is not found in course OLX. logger.warn( '[edx-val] "%s" transcript "%s" for video "%s" is not found.', language_code, file_name, edx_video_id ) return except UnicodeDecodeError: # Don't raise exception in case transcript contains non-utf8 content. logger.warn( '[edx-val] "%s" transcript "%s" for video "%s" contains a non-utf8 file content.', language_code, file_name, edx_video_id ) return # Get file format from transcript content. try: file_format = get_transcript_format(file_content) except Error as ex: # Don't raise exception, just don't create transcript record. logger.warn( '[edx-val] Error while getting transcript format for video=%s -- language_code=%s --file_name=%s', edx_video_id, language_code, file_name ) return # Create transcript record. create_video_transcript( video_id=edx_video_id, language_code=language_code, file_format=file_format, content=ContentFile(file_content), provider=provider )
def path(self): return path.combine(self.image_set.root_path(), self.filename)
def __init__(self, filepath, base_folder, force_reopen_files, poll_time, window): (rel_path, _, filename) = filepath.rpartition( '/' ) # extract '/stats.csv' from path; forward slash guaranteed by pyfilesystem self.name = rel_path[ 1:] # the experiment name is the directory path, relative to the base folder self.filename = filename self.directory = fs_path.combine(base_folder, rel_path) self.meta = {} self.metrics = [] # names of metrics self.data = [] # data for each metric (one list per metric) self.done = False # true after reading and the experiment is done writing too # start hidden if the user hid it the last time (this is a persistent setting) if self.directory in window.hidden_exp_paths: self.visible = False del window.hidden_exp_paths[self.directory] else: self.visible = True self.is_selected = False self.is_filtered = False self.style_idx = None # style index, for when it's assigned (see Plots.assign_exp_style) # register this experiment with the main window self.window = window self.table_row = None # used internally by the window window.on_exp_init(self) logger.debug( f"Initializing ExperimentReader and thread to load {self.name}") # create reader object and thread self.reader = ExperimentReader(filename=filename, directory=self.directory, force_reopen_files=force_reopen_files, poll_time=poll_time, name=self.name) self.thread = QThread() # connect ExperimentReader's signals to Experiment method slots, to return data self.reader.meta_ready.connect(self.on_meta_ready) self.reader.header_ready.connect(self.on_header_ready) self.reader.data_ready.connect(self.on_data_ready) self.reader.done.connect(self.on_done) self.reader.moveToThread( self.thread) # move the reader object to the thread self.reader.done.connect( self.thread.quit ) # connect reader done signal to terminate thread slot self.thread.started.connect( self.reader.start_reading ) # connect thread started signal to reader slot self.thread.start() # start thread logger.debug(f"Moved ExperimentReader to thread, for {self.name}")
def test_combine(self): self.assertEqual(combine("", "bar"), "bar") self.assertEqual(combine("foo", "bar"), "foo/bar")
def relative_zip_path(self): return path.combine(self.path, self.zip_name())
def relative_path(self): return path.combine(self.image_set.path, self.filename)