def download_single(path, analysis, children, rec_dict, norecord, *icgc_url): # Downloads single sample by analysis_id # If sample is downloadable if cgquery_check(analysis, *icgc_url): record = get_info("%s/%s.info" % (path, analysis))[0] dir = os.path.join(path, analysis) filename = record.files[0] checksum = record.sums[0] # delete unfinished download dirs from previous run partial = os.path.join(path, analysis + ".partial") os.system("rm -fr %s" % partial) # if the analysis_id exists in master log, create softlink to downloaded files if analysis in rec_dict.keys(): print analysis + ' exists, ' + rec_dict[analysis][0] source_file = rec_dict[analysis][0] source_dir = os.path.abspath(source_file) dest_file = os.path.join(dir, filename) dest_dir = os.path.abspath(dest_file) # File to be downloaded is not the same as the one in dict # Nothing will be done if they are the same if source_file != dest_file: # File checksum same, create link if checksum == rec_dict[analysis][1]: if os.path.exists(dest_file): os.system("rm -f %s/*" % dest_dir) else: os.system("mkdir -p %s" % dest_dir) os.system("ln -s %s %s" % (source_file, dest_file)) # Checksum different else: # If file downloaded, delete older file and create link pointing to newer one if os.path.exists(dest_file): # Compare creation time of files if os.path.getctime(source_file) < os.path.getctime(dest_file): os.system("rm %s/*" % dest_dir) os.system("ln -s %s %s" % (source_file, dest_file)) print dest_file + ' deleted and link for ' + source_file + " created" else: os.system("rm %s/*" % source_dir) os.system("ln -s %s %s" % (dest_file, source_file)) print source_file + ' deleted and link for ' + dest_file + ' created' rec_dict[analysis][0] = dest_file rec_dict[analysis][1] = checksum if norecord == 0: new_line = analysis + ' ' + dest_file + ' ' + checksum # replace old line for analysis_id in master log print "Updating master log.." replace_line(MASTER, analysis, new_line) os.system("rm %s.info" % analysis) # Download starts if files not existed. else: rec_dict.setdefault(analysis, []) new_line = analysis #if dir exists but file does not, delete dir if os.path.isdir(dir) and not os.path.exists(os.path.join(dir, filename)): os.system("rm -fr %s" % dir) if gtdownload(analysis, children, *icgc_url) == 1: print analysis + " downloaded successfully." # update record dictory and master log rec_dict[analysis].append(os.path.join(dir, filename)) rec_dict[analysis].append(checksum) if norecord == 0: new_line = analysis + ' ' + os.path.join(dir, filename) + ' ' + checksum log = open(MASTER, 'a') log.write("%s\n" % new_line) log.close() os.system("mv %s.info %s/" % (analysis, analysis)) else: print "Failed to download " + analysis os.system("rm -f %s.info" % analysis) os.system("rm %s.gto" % analysis) # If sample is not downloadable else: print "Unable to download " + analysis print "If you want to read from a file, please make sure the file exists!"
def update_records(): # Read .info master file and search for existing files in the corresponding directories. # analysis_id and file path will be added to "dict" if os.path.exists(MASTER): print "Reading from master log file..." dict = log2dict(MASTER) else: print "Creating master log and adding records." dict = {} modify_ct = 0 append_ct = 0 cwd = os.getcwd() for dir in os.listdir(cwd): if os.path.isdir(dir): # find all .info files in current directory info_file = os.path.join(cwd, dir, "%s.info" % dir) if os.path.exists(info_file): record_list = get_info(info_file) # search for analysis_id whose files exist and add to dict for rec in record_list: id = rec.analysis filename = rec.files[0] checksum = rec.sums[0] print id + ' ' + filename + ' ' + checksum file_path = os.path.join(cwd, dir, filename) # if analysis_id already exists in dict if id in dict: # duplicate downloads, delete current files and create link to old downloaded files. dest_dir = os.path.abspath(file_path) source_dir = os.path.abspath(dict[id][0]) if os.path.exists(file_path): # Duplicate analysis_id found if dict[id][0] != file_path: # Duplicate file with same checksum if dict[id][1] == checksum: print "Duplicate found at " + dict[id][0] print "Deleting " + file_path + " and creating link" os.system("rm %s/*" % dest_dir) os.system("ln -s %s %s" % (dict[id][0], file_path)) # checksum different else: # Compare creation time of files if os.path.getctime( dict[id][0]) < os.path.getctime( file_path): print "Current file is older" print "Deleting " + file_path + " and creating link" os.system("rm %s/*" % dest_dir) os.system("ln -s %s %s" % (dict[id][0], file_path)) else: print "Current file is newer" print "Updating dictionary.." os.system("rm %s/*" % source_dir) os.system("ln -s %s %s" % (file_path, dict[id][0])) dict[id][0] = file_path dict[id][1] = checksum new_line = id + ' ' + file_path + ' ' + checksum # replace old line for analysis_id in master log print "Updating master log.." replace_line(MASTER, id, new_line) modify_ct += 1 # if analysis_id does not exist in dict else: # print file_path # if corresponding files are downloaded, add id/file pair to dict if os.path.exists(file_path): dict.setdefault(id, []) dict[id].append(file_path) dict[id].append(checksum) new_line = id + ' ' + file_path + ' ' + checksum log = open(MASTER, 'a') # replace old line for analysis_id in master log print "Appending master log.." log.write("%s\n" % new_line) log.close() append_ct += 1 print str(modify_ct) + ' lines updated' print str(append_ct) + ' lines added'
def update_records(): # Read .info master file and search for existing files in the corresponding directories. # analysis_id and file path will be added to "dict" if os.path.exists(MASTER): print "Reading from master log file..." dict = log2dict(MASTER) else: print "Creating master log and adding records." dict = {} modify_ct = 0 append_ct = 0 cwd = os.getcwd() for dir in os.listdir(cwd): if os.path.isdir(dir): # find all .info files in current directory info_file = os.path.join(cwd, dir, "%s.info" % dir) if os.path.exists(info_file): record_list = get_info(info_file) # search for analysis_id whose files exist and add to dict for rec in record_list: id = rec.analysis filename = rec.files[0] checksum = rec.sums[0] print id + ' ' + filename + ' ' + checksum file_path = os.path.join(cwd, dir, filename) # if analysis_id already exists in dict if id in dict: # duplicate downloads, delete current files and create link to old downloaded files. dest_dir = os.path.abspath(file_path) source_dir = os.path.abspath(dict[id][0]) if os.path.exists(file_path): # Duplicate analysis_id found if dict[id][0] != file_path: # Duplicate file with same checksum if dict[id][1] == checksum: print "Duplicate found at " + dict[id][0] print "Deleting " + file_path + " and creating link" os.system("rm %s/*" % dest_dir) os.system("ln -s %s %s" % (dict[id][0], file_path)) # checksum different else: # Compare creation time of files if os.path.getctime(dict[id][0]) < os.path.getctime(file_path): print "Current file is older" print "Deleting " + file_path + " and creating link" os.system("rm %s/*" % dest_dir) os.system("ln -s %s %s" % (dict[id][0], file_path)) else: print "Current file is newer" print "Updating dictionary.." os.system("rm %s/*" % source_dir) os.system("ln -s %s %s" % (file_path, dict[id][0])) dict[id][0] = file_path dict[id][1] = checksum new_line = id + ' ' + file_path + ' ' + checksum # replace old line for analysis_id in master log print "Updating master log.." replace_line(MASTER, id, new_line) modify_ct += 1 # if analysis_id does not exist in dict else: # print file_path # if corresponding files are downloaded, add id/file pair to dict if os.path.exists(file_path): dict.setdefault(id, []) dict[id].append(file_path) dict[id].append(checksum) new_line = id + ' ' + file_path + ' ' + checksum log = open(MASTER, 'a') # replace old line for analysis_id in master log print "Appending master log.." log.write("%s\n" % new_line) log.close() append_ct += 1 print str(modify_ct) + ' lines updated' print str(append_ct) + ' lines added'
def download_single(path, analysis, children, rec_dict, norecord, *icgc_url): # Downloads single sample by analysis_id # If sample is downloadable if cgquery_check(analysis, *icgc_url): record = get_info("%s/%s.info" % (path, analysis))[0] dir = os.path.join(path, analysis) filename = record.files[0] checksum = record.sums[0] # delete unfinished download dirs from previous run partial = os.path.join(path, analysis + ".partial") os.system("rm -fr %s" % partial) # if the analysis_id exists in master log, create softlink to downloaded files if analysis in rec_dict.keys(): print analysis + ' exists, ' + rec_dict[analysis][0] source_file = rec_dict[analysis][0] source_dir = os.path.abspath(source_file) dest_file = os.path.join(dir, filename) dest_dir = os.path.abspath(dest_file) # File to be downloaded is not the same as the one in dict # Nothing will be done if they are the same if source_file != dest_file: # File checksum same, create link if checksum == rec_dict[analysis][1]: if os.path.exists(dest_file): os.system("rm -f %s/*" % dest_dir) else: os.system("mkdir -p %s" % dest_dir) os.system("ln -s %s %s" % (source_file, dest_file)) # Checksum different else: # If file downloaded, delete older file and create link pointing to newer one if os.path.exists(dest_file): # Compare creation time of files if os.path.getctime(source_file) < os.path.getctime( dest_file): os.system("rm %s/*" % dest_dir) os.system("ln -s %s %s" % (source_file, dest_file)) print dest_file + ' deleted and link for ' + source_file + " created" else: os.system("rm %s/*" % source_dir) os.system("ln -s %s %s" % (dest_file, source_file)) print source_file + ' deleted and link for ' + dest_file + ' created' rec_dict[analysis][0] = dest_file rec_dict[analysis][1] = checksum if norecord == 0: new_line = analysis + ' ' + dest_file + ' ' + checksum # replace old line for analysis_id in master log print "Updating master log.." replace_line(MASTER, analysis, new_line) os.system("rm %s.info" % analysis) # Download starts if files not existed. else: rec_dict.setdefault(analysis, []) new_line = analysis #if dir exists but file does not, delete dir if os.path.isdir(dir) and not os.path.exists( os.path.join(dir, filename)): os.system("rm -fr %s" % dir) if gtdownload(analysis, children, *icgc_url) == 1: print analysis + " downloaded successfully." # update record dictory and master log rec_dict[analysis].append(os.path.join(dir, filename)) rec_dict[analysis].append(checksum) if norecord == 0: new_line = analysis + ' ' + os.path.join( dir, filename) + ' ' + checksum log = open(MASTER, 'a') log.write("%s\n" % new_line) log.close() os.system("mv %s.info %s/" % (analysis, analysis)) else: print "Failed to download " + analysis os.system("rm -f %s.info" % analysis) os.system("rm %s.gto" % analysis) # If sample is not downloadable else: print "Unable to download " + analysis print "If you want to read from a file, please make sure the file exists!"