def uploadToServer(): """Upload all pali words definitions to the datastore of server programmatically via remote api. References: find the 10 largest file or folder in the directory: ls -lS path_to_folder | head -n 10 http://stackoverflow.com/questions/12522269/bash-how-to-find-the-largest-file-in-a-directory-and-its-subdirectories """ count = 0 list_of_entities = [] for dirpath, dirnames, filenames in os.walk(getDictWordsJsonDir()): for filename in filenames: path = os.path.join(dirpath, filename) print("uploading %s ..." % path) with open(path, "r") as f: # PaliWordJsonBlob(id=filename[:-5], data=f.read()).put() # PaliWordJsonBlob(id=filename, data=f.read()).put() list_of_entities.append(PaliWordJsonBlob(id=filename, data=f.read())) # Remember "1 MB API limits apply" of remote_api if len(list_of_entities) == 40: print("putting %d records ..." % len(list_of_entities)) ndb.put_multi(list_of_entities) count += len(list_of_entities) print("total number uploaded: %d" % count) list_of_entities = [] if len(list_of_entities) > 0: print("putting %d records ..." % len(list_of_entities)) ndb.put_multi(list_of_entities) count += len(list_of_entities) print("total number uploaded: %d" % count)
def uploadToServer(): """Upload all pali words definitions to the datastore of server programmatically via remote api. References: find the 10 largest file or folder in the directory: ls -lS path_to_folder | head -n 10 http://stackoverflow.com/questions/12522269/bash-how-to-find-the-largest-file-in-a-directory-and-its-subdirectories """ count = 0 list_of_entities = [] for dirpath, dirnames, filenames in os.walk(getDictWordsJsonDir()): for filename in filenames: path = os.path.join(dirpath, filename) print('uploading %s ...' % path) with open(path, 'r') as f: #PaliWordJsonBlob(id=filename[:-5], data=f.read()).put() #PaliWordJsonBlob(id=filename, data=f.read()).put() list_of_entities.append( PaliWordJsonBlob(id=filename, data=f.read())) # Remember "1 MB API limits apply" of remote_api if len(list_of_entities) == 40: print('putting %d records ...' % len(list_of_entities)) ndb.put_multi(list_of_entities) count += len(list_of_entities) print('total number uploaded: %d' % count) list_of_entities = [] if len(list_of_entities) > 0: print('putting %d records ...' % len(list_of_entities)) ndb.put_multi(list_of_entities) count += len(list_of_entities) print('total number uploaded: %d' % count)
#!/usr/bin/env python # -*- coding:utf-8 -*- import os import shutil import urllib from variables import getDictWordsJsonDir from variables import getPrefixWordsHtmlDir if __name__ == '__main__': prefixWords = {} for dirpath, dirnames, filenames in os.walk(getDictWordsJsonDir()): for filename in filenames: word = filename.decode('utf-8') if word[0] in prefixWords: prefixWords[word[0]].append(word) else: prefixWords[word[0]] = [word] if os.path.exists(getPrefixWordsHtmlDir()): shutil.rmtree(getPrefixWordsHtmlDir()) os.makedirs(getPrefixWordsHtmlDir()) else: os.makedirs(getPrefixWordsHtmlDir()) for firstLetter in prefixWords: legalNameOnGAE = urllib.quote( ('%s.html' % firstLetter).encode('utf-8')
#!/usr/bin/env python # -*- coding:utf-8 -*- import os import shutil import urllib from variables import getDictWordsJsonDir from variables import getPrefixWordsHtmlDir if __name__ == '__main__': prefixWords = {} for dirpath, dirnames, filenames in os.walk(getDictWordsJsonDir()): for filename in filenames: word = filename.decode('utf-8') if word[0] in prefixWords: prefixWords[word[0]].append(word) else: prefixWords[word[0]] = [word] if os.path.exists(getPrefixWordsHtmlDir()): shutil.rmtree(getPrefixWordsHtmlDir()) os.makedirs(getPrefixWordsHtmlDir()) else: os.makedirs(getPrefixWordsHtmlDir()) for firstLetter in prefixWords: legalNameOnGAE = urllib.quote( ('%s.html' % firstLetter).encode('utf-8')).replace('%', 'Z') path = os.path.join(getPrefixWordsHtmlDir(), legalNameOnGAE)
else: data.append([row[2], row[6]]) with open(path, 'w') as f: f.write(json.dumps(data)) else: # create new data file if dicIndex[row[2]][0] == 'zh': # convert simplified chinese to traditional chinese data = [[row[2], jtof(row[6])]] else: data = [[row[2], row[6]]] with open(path, 'w') as f: f.write(json.dumps(data)) if __name__ == '__main__': # read index of dictionary books with open(getDictBooksJsonPath(), 'r') as f: dicIndex = json.loads(f.read()) if os.path.exists(getDictWordsJsonDir()): shutil.rmtree(getDictWordsJsonDir()) os.makedirs(getDictWordsJsonDir()) else: os.makedirs(getDictWordsJsonDir()) processWordCSV(getDictWordsCSV1Path(), dicIndex, getDictWordsJsonDir()) processWordCSV(getDictWordsCSV2Path(), dicIndex, getDictWordsJsonDir())
else: data.append([row[2], row[6]]) with open(path, 'w') as f: f.write(json.dumps(data)) else: # create new data file if dicIndex[row[2]][0] == 'zh': # convert simplified chinese to traditional chinese data = [ [row[2], jtof(row[6])] ] else: data = [ [row[2], row[6]] ] with open(path, 'w') as f: f.write(json.dumps(data)) if __name__ == '__main__': # read index of dictionary books with open(getDictBooksJsonPath(), 'r') as f: dicIndex = json.loads(f.read()) if os.path.exists(getDictWordsJsonDir()): shutil.rmtree(getDictWordsJsonDir()) os.makedirs(getDictWordsJsonDir()) else: os.makedirs(getDictWordsJsonDir()) processWordCSV(getDictWordsCSV1Path(), dicIndex, getDictWordsJsonDir()) processWordCSV(getDictWordsCSV2Path(), dicIndex, getDictWordsJsonDir())