def copy_main_folders(root, identifier): assert type(identifier) == unicode or type(identifier) == str assert type(root) == unicode or type(root) == str #List of files to be copied (To flatten directory structure) file_list = findFiles(os.path.join(root, 'temp' + identifier), ['asset', 'data', 'item', 'ecml']) path = os.path.join(root, identifier) #To make the new directory in which files will be eventually stored if not os.path.exists(path): os.makedirs(path) #To make the new sub-directories in which the files will be eventually stores location = [ os.path.join(path, folder) for folder in ['assets', 'data', 'items'] ] for loc in location: if not os.path.exists(loc): os.makedirs(loc) #Copying files for f in file_list: if (f.find('asset') >= 0): shutil.copy(f, os.path.join(path, 'assets')) elif (f.find('data') >= 0): shutil.copy(f, os.path.join(path, 'data')) elif (f.find('item') >= 0): shutil.copy(f, os.path.join(path, 'items')) else: shutil.copy(f, path) #Delete the messy download directory shutil.rmtree(os.path.join(root, 'temp' + identifier))
def imageNames(directory): image_names=findFiles(directory,['png','gif','jpg']) image_names=[os.path.basename(image) for image in image_names]#Get filename from path image_names=[os.path.splitext(image)[0] for image in image_names]#Get filename without file type # image_names=[image[:-4] for image in image_names]#Possibly better since it can handle files with '.' in their name image_names=[' '.join(image.split('_')) for image in image_names]#Replace underscore('_') by space image_names=[' '.join(re.findall('[a-zA-Z]+', image)) for image in image_names]#Filter out numbers image_names=[' '.join(camel_case_split(image)) for image in image_names]#Split Camel Case image_names=[image.lower() for image in image_names]#Turn all text to lower case return(list(set(image_names)))#list(set(.)) removes identical values if any
def count_file_type_directory(directory,typ): x={} for i in typ: x[i]=0 file_list=findFiles(directory,typ) for fl in file_list: try: x[fl.split('.')[-1]]+=1 except: #In case filename has weird end type like ._oldpng (in org.ekstep.englishsecondlanguage and org.ekstep.esl1) {} return x
def unzip_files(directory): assert type(directory) == unicode or type(directory) == str #Finds all files in a directory that are of type .zip zip_list = findFiles(directory, ['.zip']) bugs = {} for zip_file in zip_list: #In case zipfile is bad try: #Extract zip file with zipfile.ZipFile(zip_file, 'r') as z: z.extractall(directory) #Delete zip file after extraction os.remove(zip_file) except: #Can return bugs if you want list of buggy zip files bugs.append(zip_file) {}
if (item_number != ''): data = (int(item_number), '\n'.join(json_dictionary[key])) if (filename in processed): processed[filename].append(data) else: processed[filename] = [data] for k in processed.keys(): processed[k] = sorted(processed[k], key=itemgetter(0)) processed[k] = '\n'.join([unicode(item[1]) for item in processed[k]]) return (processed) if not os.path.isdir(corpus_dir): os.makedirs(corpus_dir) jsonFiles = findFiles.findFiles(json_dir, ['.json']) for identifier_path in jsonFiles: max_tag_length = 5 path = os.path.join(corpus_dir, identifier_path.split('/')[-1][:-5]) if not os.path.isdir(path): os.makedirs(path) with codecs.open(identifier_path, 'r', encoding='utf-8') as f: data = json.load(f, encoding='utf-8') f.close() tags = [concept for concept in data['concepts']] #Data x = set() data_list = json.loads(''.join(data['data']), encoding='utf-8') for key in data_list.keys(): x.add(''.join( process_data(getLowestKeyValue.flattenDict(
def train_model_pvdbow(directory): doc=load_documents(findFiles(directory,['tag']),"en") model=gs.models.doc2vec.Doc2Vec(doc, size=50, min_count=3, window=8, negative=10, workers=4, sample=1e-5, dm=0) #Apply PV-DBOW return model
def train_model_pvdm(directory,language):#en-English,id-Hindi doc=load_documents(findFiles(directory,['%s-text'%(language)]),language) model=gs.models.doc2vec.Doc2Vec(doc, size=50, min_count=3, window=8, negative=10, workers=4, sample=1e-5) return model
tasklist = ["BuildEopEta", "ComputeIC"] additional_options = "" if options.EE: additional_options += " --EE " else: print( "setting up barrel calibration, if you want endcap calibration add the option --EE" ) #create outdir os.system("mkdir -p " + str(options.outdir)) #get ntuples for the calibration selected_filelist, extracalibtree_filelist = findFiles.findFiles( ntuple_dir, "unmerged", tag_list, ignored_ntuples_label_list) if (len(selected_filelist) > 0): print print("Run calibration on " + str(len(selected_filelist)) + " files:") if (options.verbosity >= 1): print("-----------------------") for filename in selected_filelist: print filename print("-----------------------") print("auto-generated extraCalibTree filelist") for filename in extracalibtree_filelist: print filename print("-----------------------") else:
import findFiles parser = argparse.ArgumentParser() parser.add_argument('--ld', help='This is the operating directory', default=os.path.join(root, 'Data')) args = parser.parse_args() op_dir = args.ld if not os.path.exists(op_dir): os.makedirs(op_dir) r = requests.get( 'http://lp-sandbox.ekstep.org:8080/taxonomy-service/v2/analytics/content/list' ).json() total_identifiers = [obj['identifier'] for obj in r['result']['contents']] file_list = findFiles.findFiles(op_dir, ['.json']) present_identifiers = [ identifier[:-5].split('/')[-1] for identifier in file_list ] absent_identifiers = [ identifier for identifier in total_identifiers if identifier not in present_identifiers ] root = os.path.dirname(os.path.abspath(__file__)) for response in r['result']['contents']: try: if (response['identifier'] not in absent_identifiers or response['identifier'] == 'test.org.ekstep.beta-mp3'): continue subprocess.call([ 'python content2EnrichedJson.py \'http://lp-sandbox.ekstep.org:8080/taxonomy-service/v2/content\' \'%s\''