def download(self):
        if not os.path.exists(self.contents_file):
            print "Error: arXiv contents file %s does not exist" % (
                self.contents_file)
            sys.exit(1)

        # Change directory to source folder
        os.chdir(self.filedir)

        print "Press 'x' to break after the current download."
        while True:
            arxiv_file_line = fq.get(self.contents_file)
            if arxiv_file_line == None:
                break

            print "Processing ", arxiv_file_line

            return_code = call([
                self.s3_cmd_ex, 'get',
                '--add-header=x-amz-request-payer: requester',
                '--skip-existing', arxiv_file_line
            ])

            if return_code != 0:
                print "Error downloading", arxiv_file_line
                break

            fq.pop(self.contents_file)
            # break if x was pressed
            if 'x' in nbRawInput('', timeout=1):
                print "Download suspended. Restart script to resume."
                break

        # Change directory to project current folder
        os.chdir(self.current_dir)
    def download(self):
        if not os.path.exists(self.contents_file):
            print "Error: arXiv contents file %s does not exist" % (self.contents_file)
            sys.exit(1)

        # Change directory to source folder
        os.chdir(self.filedir)

        print "Press 'x' to break after the current download."
        while True:
            arxiv_file_line = fq.get(self.contents_file)
            if arxiv_file_line == None: 
                break

            print "Processing ", arxiv_file_line
    
            return_code = call([self.s3_cmd_ex,'get','--add-header=x-amz-request-payer: requester','--skip-existing', arxiv_file_line])

            if return_code != 0:
                print "Error downloading", arxiv_file_line 
                break

            fq.pop(self.contents_file)
            # break if x was pressed
            if 'x' in nbRawInput('',timeout=1):
                print "Download suspended. Restart script to resume."
                break        

        # Change directory to project current folder
        os.chdir(self.current_dir)
def main():
    print 'Press "x" to break'


    if not os.path.exists(tmp_dir):
        os.mkdir(tmp_dir)

    if not os.path.exists(extract_dir):
        os.mkdir(extract_dir)

    if not os.path.exists(extraction_queue) or not RESUME:
        call('find {source_dir} -type f > {target_file}'.format(
                source_dir = bucket_dir,
                target_file = extraction_queue 
                ) , shell = True)

    while True:
        file_name = fq.get(extraction_queue)
        if file_name is None: break

        print "Extracting bucket" , file_name
        if call(['tar','xf',file_name,'-C',tmp_dir]):
            # call returns 1 on error.
            break

        if call('find %s -name *.gz -type f -exec mv {} %s \;' % (tmp_dir, extract_dir), shell = True):
            break

        if call('rm -R ' + tmp_dir + '*', shell=True):
            break

        fq.pop(extraction_queue)

        # break if x was pressed
        if nbRawInput('',timeout=1) == 'x':
            print "Extraction suspended. Restart script to resume."
            break
def main():
    print 'Press "x" to break'

    if not os.path.exists(tmp_dir):
        os.mkdir(tmp_dir)

    if not os.path.exists(extract_dir):
        os.mkdir(extract_dir)

    if not os.path.exists(extraction_queue) or not RESUME:
        call('find {source_dir} -type f > {target_file}'.format(
            source_dir=bucket_dir, target_file=extraction_queue),
             shell=True)

    while True:
        file_name = fq.get(extraction_queue)
        if file_name is None: break

        print "Extracting bucket", file_name
        if call(['tar', 'xf', file_name, '-C', tmp_dir]):
            # call returns 1 on error.
            break

        if call('find %s -name *.gz -type f -exec mv {} %s \;' %
                (tmp_dir, extract_dir),
                shell=True):
            break

        if call('rm -R ' + tmp_dir + '*', shell=True):
            break

        fq.pop(extraction_queue)

        # break if x was pressed
        if nbRawInput('', timeout=1) == 'x':
            print "Extraction suspended. Restart script to resume."
            break
    def extract(self):
        print "Press 'x' to interupt the extraction process"
        if not os.path.exists(self.tmp_dir):
            os.mkdir(self.tmp_dir)

        if not os.path.exists(self.extract_dir):
            os.mkdir(self.extract_dir)

        #Creates arXiv_extraction_queue.txt if it doesn't exist by finding all the tar files in the download folder
        if not os.path.exists(self.extraction_queue):
            call('find {source_dir}*.tar -type f > {target_file}'.format(
                source_dir=self.filedir, target_file=self.extraction_queue),
                 shell=True)

        while True:
            file_name = fq.get(self.extraction_queue)
            if file_name is None: break

            print "Extracting bucket", file_name
            if call(['tar', 'xf', file_name, '-C', self.tmp_dir]):
                # call returns 1 on error.
                break

            if call('find %s -name *.gz -type f -exec mv {} %s \;' %
                    (self.tmp_dir, self.extract_dir),
                    shell=True):
                break

            if call('rm -R ' + self.tmp_dir + '*', shell=True):
                break

            fq.pop(self.extraction_queue)

            # break if x was pressed
            if nbRawInput('', timeout=1) == 'x':
                print "Extraction suspended. Restart script to resume."
                break
    def extract(self):
        print "Press 'x' to interupt the extraction process"
        if not os.path.exists(self.tmp_dir):
            os.mkdir(self.tmp_dir)

        if not os.path.exists(self.extract_dir):
            os.mkdir(self.extract_dir)

        #Creates arXiv_extraction_queue.txt if it doesn't exist by finding all the tar files in the download folder
        if not os.path.exists(self.extraction_queue):
            call('find {source_dir}*.tar -type f > {target_file}'.format(
                    source_dir = self.filedir,
                    target_file = self.extraction_queue 
                    ) , shell = True)

        while True:
            file_name = fq.get(self.extraction_queue)
            if file_name is None: break

            print "Extracting bucket" , file_name
            if call(['tar','xf',file_name,'-C',self.tmp_dir]):
                # call returns 1 on error.
                break

            if call('find %s -name *.gz -type f -exec mv {} %s \;' % (self.tmp_dir, self.extract_dir), shell = True):
                break

            if call('rm -R ' + self.tmp_dir + '*', shell=True):
                break

            fq.pop(self.extraction_queue)

            # break if x was pressed
            if nbRawInput('',timeout=1) == 'x':
                print "Extraction suspended. Restart script to resume."
                break
cur_dir = os.getcwd()

contents_file = cur_dir + '/s3_contents.txt'
s3_cmd_ex     = cur_dir + "/../tools/s3cmd/s3cmd"
dl_dir        = cur_dir + '/../DATA/BUCKETS/'

if not os.path.exists(dl_dir):
    os.makedirs(dl_dir)

os.chdir(dl_dir)

print "Press 'x' to suspend after the current download."
while True:
    line = fq.get(contents_file)
    if line == None: 
        break

    print "Processing ", line
    
    return_code = call([s3_cmd_ex,'get','--add-header=x-amz-request-payer: requester','--skip-existing',line])

    if return_code != 0:
        print "ERROR downloading", line 
        break

    fq.pop(contents_file)
    # break if x was pressed
    if 'x' in nbRawInput('',timeout=1):
        print "Download suspended. Restart script to resume."
        break
Example #8
0
s3_cmd_ex = cur_dir + "/../tools/s3cmd/s3cmd"
dl_dir = cur_dir + '/../DATA/BUCKETS/'

if not os.path.exists(dl_dir):
    os.makedirs(dl_dir)

os.chdir(dl_dir)

print "Press 'x' to suspend after the current download."
while True:
    line = fq.get(contents_file)
    if line == None:
        break

    print "Processing ", line

    return_code = call([
        s3_cmd_ex, 'get', '--add-header=x-amz-request-payer: requester',
        '--skip-existing', line
    ])

    if return_code != 0:
        print "ERROR downloading", line
        break

    fq.pop(contents_file)
    # break if x was pressed
    if 'x' in nbRawInput('', timeout=1):
        print "Download suspended. Restart script to resume."
        break