Exemple #1
0
def download_single(path, analysis, children, rec_dict, norecord, *icgc_url):
# Downloads single sample by analysis_id

    # If sample is downloadable
    if cgquery_check(analysis, *icgc_url):
      	record = get_info("%s/%s.info" % (path, analysis))[0]
	dir = os.path.join(path, analysis)
	filename = record.files[0]
	checksum = record.sums[0]
        # delete unfinished download dirs from previous run
	partial = os.path.join(path, analysis + ".partial")
	os.system("rm -fr %s" % partial)
	# if the analysis_id exists in master log, create softlink to downloaded files
        if analysis in rec_dict.keys():
	    print analysis + ' exists, ' + rec_dict[analysis][0]
            source_file = rec_dict[analysis][0]
	    source_dir = os.path.abspath(source_file)
            dest_file = os.path.join(dir, filename)
	    dest_dir = os.path.abspath(dest_file)
	    # File to be downloaded is not the same as the one in dict
	    # Nothing will be done if they are the same
	    if source_file != dest_file:
		# File checksum same, create link
		if checksum == rec_dict[analysis][1]:
		    if os.path.exists(dest_file):
		  	os.system("rm -f %s/*" % dest_dir)
		    else:
			os.system("mkdir -p %s" % dest_dir)
	            os.system("ln -s %s %s" % (source_file, dest_file))
		# Checksum different
		else:
		    # If file downloaded, delete older file and create link pointing to newer one
		    if os.path.exists(dest_file):
                        # Compare creation time of files
                        if os.path.getctime(source_file) < os.path.getctime(dest_file):
                            os.system("rm %s/*" % dest_dir)
                            os.system("ln -s %s %s" % (source_file, dest_file))
			    print dest_file + ' deleted and link for ' + source_file + " created"
                        else:
			    os.system("rm %s/*" % source_dir)
			    os.system("ln -s %s %s" % (dest_file, source_file))
			    print source_file + ' deleted and link for ' + dest_file + ' created'
                            rec_dict[analysis][0] = dest_file
                            rec_dict[analysis][1] = checksum
			    if norecord == 0:
			    	new_line = analysis + ' ' + dest_file + ' ' + checksum
			    	# replace old line for analysis_id in master log
			    	print "Updating master log.."
			    	replace_line(MASTER, analysis, new_line)
	

	    os.system("rm %s.info" % analysis)
	    
        # Download starts if files not existed.
	else:
	    rec_dict.setdefault(analysis, [])
	    new_line = analysis
	    #if dir exists but file does not, delete dir
   	    if os.path.isdir(dir) and not os.path.exists(os.path.join(dir, filename)):
		os.system("rm -fr %s" % dir)
	    if gtdownload(analysis, children, *icgc_url) == 1:
		print analysis + " downloaded successfully."
		# update record dictory and master log
		rec_dict[analysis].append(os.path.join(dir, filename))
            	rec_dict[analysis].append(checksum)
		if norecord == 0:
                    new_line = analysis + ' ' + os.path.join(dir, filename) + ' ' + checksum
                    log = open(MASTER, 'a')
                    log.write("%s\n" % new_line)
                    log.close()
		os.system("mv %s.info %s/" % (analysis, analysis))
	    else:
		print "Failed to download " + analysis
		os.system("rm -f %s.info" % analysis)
	    os.system("rm %s.gto" % analysis)

    # If sample is not downloadable
    else:
        print "Unable to download " + analysis
	print "If you want to read from a file, please make sure the file exists!"
Exemple #2
0
def update_records():
    # Read .info master file and search for existing files in the corresponding directories.
    # analysis_id and file path will be added to "dict"

    if os.path.exists(MASTER):
        print "Reading from master log file..."
        dict = log2dict(MASTER)
    else:
        print "Creating master log and adding records."
        dict = {}
    modify_ct = 0
    append_ct = 0
    cwd = os.getcwd()
    for dir in os.listdir(cwd):
        if os.path.isdir(dir):
            # find all .info files in current directory
            info_file = os.path.join(cwd, dir, "%s.info" % dir)
            if os.path.exists(info_file):
                record_list = get_info(info_file)
                # search for analysis_id whose files exist and add to dict
                for rec in record_list:
                    id = rec.analysis
                    filename = rec.files[0]
                    checksum = rec.sums[0]
                    print id + ' ' + filename + ' ' + checksum
                    file_path = os.path.join(cwd, dir, filename)
                    # if analysis_id already exists in dict
                    if id in dict:
                        # duplicate downloads, delete current files and create link to old downloaded files.
                        dest_dir = os.path.abspath(file_path)
                        source_dir = os.path.abspath(dict[id][0])
                        if os.path.exists(file_path):
                            # Duplicate analysis_id found
                            if dict[id][0] != file_path:
                                # Duplicate file with same checksum
                                if dict[id][1] == checksum:
                                    print "Duplicate found at " + dict[id][0]
                                    print "Deleting " + file_path + " and creating link"
                                    os.system("rm %s/*" % dest_dir)
                                    os.system("ln -s %s %s" %
                                              (dict[id][0], file_path))
                                # checksum different
                                else:
                                    # Compare creation time of files
                                    if os.path.getctime(
                                            dict[id][0]) < os.path.getctime(
                                                file_path):
                                        print "Current file is older"
                                        print "Deleting " + file_path + " and creating link"
                                        os.system("rm %s/*" % dest_dir)
                                        os.system("ln -s %s %s" %
                                                  (dict[id][0], file_path))
                                    else:
                                        print "Current file is newer"
                                        print "Updating dictionary.."
                                        os.system("rm %s/*" % source_dir)
                                        os.system("ln -s %s %s" %
                                                  (file_path, dict[id][0]))
                                        dict[id][0] = file_path
                                        dict[id][1] = checksum
                                        new_line = id + ' ' + file_path + ' ' + checksum
                                        # replace old line for analysis_id in master log
                                        print "Updating master log.."
                                        replace_line(MASTER, id, new_line)
                                        modify_ct += 1

                    # if analysis_id does not exist in dict
                    else:
                        #			print file_path
                        # if corresponding files are downloaded, add id/file pair to dict
                        if os.path.exists(file_path):
                            dict.setdefault(id, [])
                            dict[id].append(file_path)
                            dict[id].append(checksum)
                            new_line = id + ' ' + file_path + ' ' + checksum
                            log = open(MASTER, 'a')
                            # replace old line for analysis_id in master log
                            print "Appending master log.."
                            log.write("%s\n" % new_line)
                            log.close()
                            append_ct += 1
    print str(modify_ct) + ' lines updated'
    print str(append_ct) + ' lines added'
Exemple #3
0
def update_records():
# Read .info master file and search for existing files in the corresponding directories. 
# analysis_id and file path will be added to "dict"

    if os.path.exists(MASTER):
        print "Reading from master log file..."
        dict = log2dict(MASTER)
    else:
        print "Creating master log and adding records."
        dict = {}
    modify_ct = 0
    append_ct = 0
    cwd = os.getcwd()
    for dir in os.listdir(cwd):
	if os.path.isdir(dir):
 	# find all .info files in current directory
	    info_file = os.path.join(cwd, dir, "%s.info" % dir)
	    if os.path.exists(info_file):
	    	record_list = get_info(info_file)
	    	# search for analysis_id whose files exist and add to dict
	    	for rec in record_list:
		    id = rec.analysis
            	    filename = rec.files[0]
		    checksum = rec.sums[0]
		    print id + ' ' + filename + ' ' + checksum
		    file_path = os.path.join(cwd, dir, filename)
		    # if analysis_id already exists in dict
		    if id in dict:
		    	# duplicate downloads, delete current files and create link to old downloaded files.
			dest_dir = os.path.abspath(file_path)
		   	source_dir = os.path.abspath(dict[id][0])
			if os.path.exists(file_path): 
			    # Duplicate analysis_id found
			    if dict[id][0] != file_path:
				# Duplicate file with same checksum	
				if dict[id][1] == checksum:
				    print "Duplicate found at " + dict[id][0]
				    print "Deleting " + file_path + " and creating link"
				    os.system("rm %s/*" % dest_dir)
				    os.system("ln -s %s %s" % (dict[id][0], file_path))
				# checksum different
				else:
				    # Compare creation time of files
				    if os.path.getctime(dict[id][0]) < os.path.getctime(file_path):
					print "Current file is older"
					print "Deleting " + file_path + " and creating link"
					os.system("rm %s/*" % dest_dir)
					os.system("ln -s %s %s" % (dict[id][0], file_path))
				    else:
					print "Current file is newer"
					print "Updating dictionary.."
					os.system("rm %s/*" % source_dir)
					os.system("ln -s %s %s" % (file_path, dict[id][0]))
					dict[id][0] = file_path
					dict[id][1] = checksum
                           		new_line = id + ' ' + file_path + ' ' + checksum
                            		# replace old line for analysis_id in master log
                            		print "Updating master log.."
                            		replace_line(MASTER, id, new_line)
					modify_ct += 1

				
		    # if analysis_id does not exist in dict
		    else:
#			print file_path
		    	# if corresponding files are downloaded, add id/file pair to dict
                        if os.path.exists(file_path):
			    dict.setdefault(id, [])
			    dict[id].append(file_path)
			    dict[id].append(checksum)
			    new_line = id + ' ' + file_path + ' ' + checksum
			    log = open(MASTER, 'a')
                            # replace old line for analysis_id in master log
                            print "Appending master log.."
                            log.write("%s\n" % new_line)
			    log.close()
			    append_ct += 1
    print str(modify_ct) + ' lines updated'
    print str(append_ct) + ' lines added'
Exemple #4
0
def download_single(path, analysis, children, rec_dict, norecord, *icgc_url):
    # Downloads single sample by analysis_id

    # If sample is downloadable
    if cgquery_check(analysis, *icgc_url):
        record = get_info("%s/%s.info" % (path, analysis))[0]
        dir = os.path.join(path, analysis)
        filename = record.files[0]
        checksum = record.sums[0]
        # delete unfinished download dirs from previous run
        partial = os.path.join(path, analysis + ".partial")
        os.system("rm -fr %s" % partial)
        # if the analysis_id exists in master log, create softlink to downloaded files
        if analysis in rec_dict.keys():
            print analysis + ' exists, ' + rec_dict[analysis][0]
            source_file = rec_dict[analysis][0]
            source_dir = os.path.abspath(source_file)
            dest_file = os.path.join(dir, filename)
            dest_dir = os.path.abspath(dest_file)
            # File to be downloaded is not the same as the one in dict
            # Nothing will be done if they are the same
            if source_file != dest_file:
                # File checksum same, create link
                if checksum == rec_dict[analysis][1]:
                    if os.path.exists(dest_file):
                        os.system("rm -f %s/*" % dest_dir)
                    else:
                        os.system("mkdir -p %s" % dest_dir)
                    os.system("ln -s %s %s" % (source_file, dest_file))
                # Checksum different
                else:
                    # If file downloaded, delete older file and create link pointing to newer one
                    if os.path.exists(dest_file):
                        # Compare creation time of files
                        if os.path.getctime(source_file) < os.path.getctime(
                                dest_file):
                            os.system("rm %s/*" % dest_dir)
                            os.system("ln -s %s %s" % (source_file, dest_file))
                            print dest_file + ' deleted and link for ' + source_file + " created"
                        else:
                            os.system("rm %s/*" % source_dir)
                            os.system("ln -s %s %s" % (dest_file, source_file))
                            print source_file + ' deleted and link for ' + dest_file + ' created'
                            rec_dict[analysis][0] = dest_file
                            rec_dict[analysis][1] = checksum
                            if norecord == 0:
                                new_line = analysis + ' ' + dest_file + ' ' + checksum
                                # replace old line for analysis_id in master log
                                print "Updating master log.."
                                replace_line(MASTER, analysis, new_line)

            os.system("rm %s.info" % analysis)

    # Download starts if files not existed.
        else:
            rec_dict.setdefault(analysis, [])
            new_line = analysis
            #if dir exists but file does not, delete dir
            if os.path.isdir(dir) and not os.path.exists(
                    os.path.join(dir, filename)):
                os.system("rm -fr %s" % dir)
            if gtdownload(analysis, children, *icgc_url) == 1:
                print analysis + " downloaded successfully."
                # update record dictory and master log
                rec_dict[analysis].append(os.path.join(dir, filename))
                rec_dict[analysis].append(checksum)
                if norecord == 0:
                    new_line = analysis + ' ' + os.path.join(
                        dir, filename) + ' ' + checksum
                    log = open(MASTER, 'a')
                    log.write("%s\n" % new_line)
                    log.close()
                os.system("mv %s.info %s/" % (analysis, analysis))
            else:
                print "Failed to download " + analysis
                os.system("rm -f %s.info" % analysis)
            os.system("rm %s.gto" % analysis)

    # If sample is not downloadable
    else:
        print "Unable to download " + analysis
        print "If you want to read from a file, please make sure the file exists!"