Ejemplo n.º 1
0
    def downloadMetaData(self, simulateDownload=False):
        """ Download metadata """

        #used for test
        if simulateDownload is True:
            time.sleep(constConfig.TEST_THREAD_SLEEP_TIME)
            return downStatus

        self.d.downTime = timezone.now()

        # get root directory
        root = self.service.metadata("/",
                                     include_deleted=True,
                                     include_media_info=True)

        # get others directory
        fileMetaData = self.recurseDropTree(root, 5)

        self.metadata = fileMetaData
        meta = base64.b64encode(json.dumps(fileMetaData))
        metaTime = timezone.now()
        metaHash = crypto.rsaSignatureSHA256(
            meta + crypto.HASH_SEPARATOR + format(metaTime, "U"),
            settings.PRIV_KEY)

        #store the data
        storeFM = FileMetadata(metadata=meta,
                               tokenID=self.t,
                               metaTime=metaTime,
                               metadataHash=metaHash)
        storeFM.save()

        self.d.threadStatus = constConfig.THREAD_DOWN_META
        self.d.save()
def process_license_files(current_file, file_name, file_metadata_dict):
    license_array = []
    copyright_array = []
    licenses = current_file['licenses']
    for lic in licenses:
        license_array.append(lic['spdx_license_key'])

    copyrights = current_file['copyrights']
    for cprt in copyrights:
        # holders = "\n".join(cprt['holders'])
        holders = cprt['holders']
        for holder in holders:
            copyright_array.append(holder)

    file_metadata = FileMetadata(" ".join(set(copyright_array)),
                                 " ".join(set(license_array)))
    file_metadata_dict[file_metadata] = {"N/A": [file_name]}
Ejemplo n.º 3
0
	def downloadMetaData(self,simulateDownload = False):
		""" Download the metadata """

		#used for tests
		if simulateDownload is True:
			#simulate the download by waiting 10 seconds
			time.sleep(constConfig.TEST_THREAD_SLEEP_TIME)
			return downStatus
		
		#download
		self.d.downTime = timezone.now()
		
		result = []
		page_token = None

		while True:
			param = {}

			if page_token:
				param['pageToken'] = page_token
				param['maxResults'] = 500
			
			files = self.service.files().list(**param).execute()
			result.extend(files['items'])
			page_token = files.get('nextPageToken')
			
			if not page_token:
				break


		self.metadata = result
		
		meta = base64.b64encode(json.dumps(self.metadata))

		metaTime = timezone.now()
		
		txt = meta+crypto.HASH_SEPARATOR+format(metaTime,"U")
		metaHash = crypto.rsaSignatureSHA256(txt,settings.PRIV_KEY)

		storeFM = FileMetadata(metadata=meta,tokenID=self.t,metaTime=metaTime,metadataHash=metaHash)
		storeFM.save()

		self.d.threadStatus = constConfig.THREAD_DOWN_META
		self.d.save()
def process_copyright_and_license_information(scancode_output_file):
    f = open(scancode_output_file, "r")

    if f.mode == 'r':
        file_contents = f.read()
        parsed_json = json.loads(file_contents)
        files = parsed_json['files']

        file_metadata_dict = {}

        for current_file in files:
            file_name = current_file['path']
            file_ext = current_file['extension']
            if not re.match("license",
                            str(file_name).split("/")[-1], re.IGNORECASE):
                if len(file_ext) == 0:
                    file_ext = "N/A"
                # Most of the file has got just one copyright. To get unique combination of license +
                # copyright # ,get the first copyright information from copyright array
                try:
                    copyrights = current_file['copyrights']
                    if len(copyrights) != 0:
                        cprt = copyrights[0]
                        try:
                            holders = cprt['holders']
                            try:
                                if len(holders) != 0:
                                    cprt_holder = holders[0]
                                else:
                                    cprt_holder = "N/A"
                            except KeyError:
                                cprt_holder = "N/A"
                        except KeyError:
                            cprt_holder = "N/A"
                    else:
                        cprt_holder = "N/A"
                except KeyError:
                    cprt_holder = "N/A"

                try:
                    licenses = current_file['licenses']
                    # iterate over array of licenses for this file
                    if len(licenses) != 0:
                        for lic in licenses:
                            file_metadata = FileMetadata(
                                cprt_holder, lic['spdx_license_key'])
                            if file_metadata not in file_metadata_dict:
                                file_ext_dic = {file_ext: [file_name]}
                                file_metadata_dict[
                                    file_metadata] = file_ext_dic
                            else:
                                existing_file_ext_dic = file_metadata_dict.get(
                                    file_metadata)
                                if file_ext in existing_file_ext_dic:
                                    contained_files = existing_file_ext_dic[
                                        file_ext]
                                    # print(contained_files)
                                    if file_name not in contained_files:
                                        contained_files.append(file_name)
                                else:
                                    existing_file_ext_dic[file_ext] = [
                                        file_name
                                    ]
                    else:
                        file_metadata = FileMetadata(cprt_holder, "N/A")
                        if file_metadata not in file_metadata_dict:
                            file_ext_dic = {file_ext: [file_name]}
                            file_metadata_dict[file_metadata] = file_ext_dic
                        else:
                            existing_file_ext_dic = file_metadata_dict.get(
                                file_metadata)
                            if file_ext in existing_file_ext_dic:
                                contained_files = existing_file_ext_dic[
                                    file_ext]
                                # print(contained_files)
                                if file_name not in contained_files:
                                    contained_files.append(file_name)
                            else:
                                existing_file_ext_dic[file_ext] = [file_name]
                except KeyError:
                    file_metadata = FileMetadata(cprt_holder, "N/A")
                    if file_metadata not in file_metadata_dict:
                        file_ext_dic = {file_ext: [file_name]}
                        file_metadata_dict[file_metadata] = file_ext_dic
                    else:
                        existing_file_ext_dic = file_metadata_dict.get(
                            file_metadata)
                        if file_ext in existing_file_ext_dic:
                            contained_files = existing_file_ext_dic[file_ext]
                            # print(contained_files)
                            if file_name not in contained_files:
                                contained_files.append(file_name)
                        else:
                            existing_file_ext_dic[file_ext] = [file_name]
            else:
                process_license_files(current_file, file_name,
                                      file_metadata_dict)

        # print_file_metadata(file_metadata_dict)

    return file_metadata_dict