def makecsv(self): """iterates through each file in the folder and puts it into toupload.csv in the same folder along with an dentifier and filename """ #since we use json, we replace all commas(',') in the file with @# """The bulkuploader cannot do this as it can work only with csv's The dirty way to do this is to get the map from th datastore and replace all @#'s with commas """ filelist=os.listdir(pathname) count=0 for filename in filelist: count = count+1 logging.info(str(count) + ":" +filename) cur_file = open(pathname+"/"+filename) content=cur_file.read() finalcontent="" for word in content.split(): finalcontent = finalcontent +" " + filter(self.onlyascii,word) content=finalcontent.replace("\"","@") #oneline = str(count)+","+"logs"+","+content.strip().replace(",","@#")+","+filename map = Map(uniqueid=count, dataset_name="blogs", text=str(content), filename=filename) map.put()
def upload(): file=open("toupload.csv","r") for line in file: list = line.split(",") map = Map(uniqueid=int(list[0]), dataset_name=list[1], text=list[2], filename="logs.txt") map.put() list=[]