def download(self): if not os.path.exists(self.contents_file): print "Error: arXiv contents file %s does not exist" % ( self.contents_file) sys.exit(1) # Change directory to source folder os.chdir(self.filedir) print "Press 'x' to break after the current download." while True: arxiv_file_line = fq.get(self.contents_file) if arxiv_file_line == None: break print "Processing ", arxiv_file_line return_code = call([ self.s3_cmd_ex, 'get', '--add-header=x-amz-request-payer: requester', '--skip-existing', arxiv_file_line ]) if return_code != 0: print "Error downloading", arxiv_file_line break fq.pop(self.contents_file) # break if x was pressed if 'x' in nbRawInput('', timeout=1): print "Download suspended. Restart script to resume." break # Change directory to project current folder os.chdir(self.current_dir)
def download(self): if not os.path.exists(self.contents_file): print "Error: arXiv contents file %s does not exist" % (self.contents_file) sys.exit(1) # Change directory to source folder os.chdir(self.filedir) print "Press 'x' to break after the current download." while True: arxiv_file_line = fq.get(self.contents_file) if arxiv_file_line == None: break print "Processing ", arxiv_file_line return_code = call([self.s3_cmd_ex,'get','--add-header=x-amz-request-payer: requester','--skip-existing', arxiv_file_line]) if return_code != 0: print "Error downloading", arxiv_file_line break fq.pop(self.contents_file) # break if x was pressed if 'x' in nbRawInput('',timeout=1): print "Download suspended. Restart script to resume." break # Change directory to project current folder os.chdir(self.current_dir)
def main(): print 'Press "x" to break' if not os.path.exists(tmp_dir): os.mkdir(tmp_dir) if not os.path.exists(extract_dir): os.mkdir(extract_dir) if not os.path.exists(extraction_queue) or not RESUME: call('find {source_dir} -type f > {target_file}'.format( source_dir = bucket_dir, target_file = extraction_queue ) , shell = True) while True: file_name = fq.get(extraction_queue) if file_name is None: break print "Extracting bucket" , file_name if call(['tar','xf',file_name,'-C',tmp_dir]): # call returns 1 on error. break if call('find %s -name *.gz -type f -exec mv {} %s \;' % (tmp_dir, extract_dir), shell = True): break if call('rm -R ' + tmp_dir + '*', shell=True): break fq.pop(extraction_queue) # break if x was pressed if nbRawInput('',timeout=1) == 'x': print "Extraction suspended. Restart script to resume." break
def main(): print 'Press "x" to break' if not os.path.exists(tmp_dir): os.mkdir(tmp_dir) if not os.path.exists(extract_dir): os.mkdir(extract_dir) if not os.path.exists(extraction_queue) or not RESUME: call('find {source_dir} -type f > {target_file}'.format( source_dir=bucket_dir, target_file=extraction_queue), shell=True) while True: file_name = fq.get(extraction_queue) if file_name is None: break print "Extracting bucket", file_name if call(['tar', 'xf', file_name, '-C', tmp_dir]): # call returns 1 on error. break if call('find %s -name *.gz -type f -exec mv {} %s \;' % (tmp_dir, extract_dir), shell=True): break if call('rm -R ' + tmp_dir + '*', shell=True): break fq.pop(extraction_queue) # break if x was pressed if nbRawInput('', timeout=1) == 'x': print "Extraction suspended. Restart script to resume." break
def extract(self): print "Press 'x' to interupt the extraction process" if not os.path.exists(self.tmp_dir): os.mkdir(self.tmp_dir) if not os.path.exists(self.extract_dir): os.mkdir(self.extract_dir) #Creates arXiv_extraction_queue.txt if it doesn't exist by finding all the tar files in the download folder if not os.path.exists(self.extraction_queue): call('find {source_dir}*.tar -type f > {target_file}'.format( source_dir=self.filedir, target_file=self.extraction_queue), shell=True) while True: file_name = fq.get(self.extraction_queue) if file_name is None: break print "Extracting bucket", file_name if call(['tar', 'xf', file_name, '-C', self.tmp_dir]): # call returns 1 on error. break if call('find %s -name *.gz -type f -exec mv {} %s \;' % (self.tmp_dir, self.extract_dir), shell=True): break if call('rm -R ' + self.tmp_dir + '*', shell=True): break fq.pop(self.extraction_queue) # break if x was pressed if nbRawInput('', timeout=1) == 'x': print "Extraction suspended. Restart script to resume." break
def extract(self): print "Press 'x' to interupt the extraction process" if not os.path.exists(self.tmp_dir): os.mkdir(self.tmp_dir) if not os.path.exists(self.extract_dir): os.mkdir(self.extract_dir) #Creates arXiv_extraction_queue.txt if it doesn't exist by finding all the tar files in the download folder if not os.path.exists(self.extraction_queue): call('find {source_dir}*.tar -type f > {target_file}'.format( source_dir = self.filedir, target_file = self.extraction_queue ) , shell = True) while True: file_name = fq.get(self.extraction_queue) if file_name is None: break print "Extracting bucket" , file_name if call(['tar','xf',file_name,'-C',self.tmp_dir]): # call returns 1 on error. break if call('find %s -name *.gz -type f -exec mv {} %s \;' % (self.tmp_dir, self.extract_dir), shell = True): break if call('rm -R ' + self.tmp_dir + '*', shell=True): break fq.pop(self.extraction_queue) # break if x was pressed if nbRawInput('',timeout=1) == 'x': print "Extraction suspended. Restart script to resume." break
cur_dir = os.getcwd() contents_file = cur_dir + '/s3_contents.txt' s3_cmd_ex = cur_dir + "/../tools/s3cmd/s3cmd" dl_dir = cur_dir + '/../DATA/BUCKETS/' if not os.path.exists(dl_dir): os.makedirs(dl_dir) os.chdir(dl_dir) print "Press 'x' to suspend after the current download." while True: line = fq.get(contents_file) if line == None: break print "Processing ", line return_code = call([s3_cmd_ex,'get','--add-header=x-amz-request-payer: requester','--skip-existing',line]) if return_code != 0: print "ERROR downloading", line break fq.pop(contents_file) # break if x was pressed if 'x' in nbRawInput('',timeout=1): print "Download suspended. Restart script to resume." break
s3_cmd_ex = cur_dir + "/../tools/s3cmd/s3cmd" dl_dir = cur_dir + '/../DATA/BUCKETS/' if not os.path.exists(dl_dir): os.makedirs(dl_dir) os.chdir(dl_dir) print "Press 'x' to suspend after the current download." while True: line = fq.get(contents_file) if line == None: break print "Processing ", line return_code = call([ s3_cmd_ex, 'get', '--add-header=x-amz-request-payer: requester', '--skip-existing', line ]) if return_code != 0: print "ERROR downloading", line break fq.pop(contents_file) # break if x was pressed if 'x' in nbRawInput('', timeout=1): print "Download suspended. Restart script to resume." break