def train_model_pvdm(directory, language): if language == ['tags']: doc = load_documents(findFiles(directory, ['tag']), "en-text") else: doc = load_documents(findFiles(directory, [language]), language) if not doc: return 0 model = gs.models.doc2vec.Doc2Vec(doc, size=pvdm_size, min_count=pvdm_min_count, window=pvdm_window, negative=pvdm_negative, workers=pvdm_workers, sample=pvdm_sample) return model
def copy_main_folders(downloadPath, identifier, downloadedFile): if type(identifier) == unicode or type(identifier) == str: if type(downloadPath) == unicode or type(downloadPath) == str: #List of files to be copied (To flatten directory structure) file_list = findFiles(os.path.join(downloadPath, downloadedFile), ['asset', 'data', 'item', 'ecml']) path = os.path.join(downloadPath, identifier) #To make the new directory in which files will be eventually stored if not os.path.exists(path): os.makedirs(path) #To make the new sub-directories in which the files will be eventually stores location = [ os.path.join(path, folder) for folder in ['assets', 'data', 'items'] ] for loc in location: if not os.path.exists(loc): os.makedirs(loc) #Copying files for f in file_list: if (f.find('asset') >= 0): shutil.copy(f, os.path.join(path, 'assets')) elif (f.find('data') >= 0): shutil.copy(f, os.path.join(path, 'data')) elif (f.find('item') >= 0): shutil.copy(f, os.path.join(path, 'items')) else: shutil.copy(f, path)
def imageNames(directory): image_names=findFiles(directory,['png','gif','jpg']) image_names=[os.path.basename(image) for image in image_names]#Get filename from path image_names=[os.path.splitext(image)[0] for image in image_names]#Get filename without file type # image_names=[image[:-4] for image in image_names]#Possibly better since it can handle files with '.' in their name image_names=[' '.join(image.split('_')) for image in image_names]#Replace underscore('_') by space image_names=[' '.join(re.findall('[a-zA-Z]+', image)) for image in image_names]#Filter out numbers image_names=[' '.join(camel_case_split(image)) for image in image_names]#Split Camel Case image_names=[image.lower() for image in image_names]#Turn all text to lower case return(list(set(image_names)))#list(set(.)) removes identical values if any
def count_file_type_directory(directory, typ): x = {} for i in typ: x[i] = 0 file_list = findFiles(directory, typ) for fl in file_list: try: x[fl.split('.')[-1]] += 1 except: #In case filename has weird end type like ._oldpng (in org.ekstep.englishsecondlanguage and org.ekstep.esl1) {} return x
def unzip_files(directory, file_type=['.zip']): if type(directory) == unicode or type(directory) == str: #Finds all files in a directory that are of type .zip zip_list = findFiles(directory, file_type) bugs = {} for zip_file in zip_list: #In case zipfile is bad try: #Extract zip file with zipfile.ZipFile(zip_file, 'r') as z: z.extractall(directory) #Delete zip file after extraction os.remove(zip_file) except: #Can return bugs if you want list of buggy zip files bugs.append(zip_file)