コード例 #1
0
ファイル: extract_db_snapshot.py プロジェクト: MiltosD/ELRC2
def create_snapshot():
    unique_resources = [
        r for r in resourceInfoType_model.objects.filter(
            storage_object__deleted=False)
        if (_is_processed(r) or _is_not_processed_or_related(r))
    ]

    # init dict
    json_output = dict(unique_resources={
        "count": len(unique_resources),
        "metadata": []
    })

    for r in unique_resources:
        output = dict()
        output["id"] = r.id
        output['resourceName'] = best_lang_value_retriever(
            r.identificationInfo.resourceName)
        output['resourceType'] = r.resource_type()
        output['mimetypes'] = _get_resource_mimetypes(r)
        output['linguality'] = _get_resource_linguality(r)
        output['languages'] = list(set(_get_resource_lang_info(r)))
        output['domains'] = list(set(_get_resource_domain_info(r)))
        output['dsis'] = list(set(r.identificationInfo.appropriatenessForDSI))
        output['licences'] = list(set(get_licenses(r)))
        output['psi'] = _get_psi(r)
        output['country'] = _get_country(r)
        output['created'] = r.storage_object.created.strftime("%d-%m-%Y")
        output['downloads'] = model_utils.get_lr_stat_action_count(
            r.storage_object.identifier, DOWNLOAD_STAT)
        output['views'] = model_utils.get_lr_stat_action_count(
            r.storage_object.identifier, VIEW_STAT)
        # output['partner'] = r.management_object.partner_responsible
        output['projects'] = get_projects(r)
        # output['processed'] = r.management_object.is_processed_version
        output['related_resources'] = get_related_ids(r)
        output['validated'] = True if r.storage_object.get_validation(
        ) else False
        output['status'] = status[r.storage_object.publication_status]

        json_output['unique_resources']['metadata'].append(output)

    out_file_path = "{}unique_resources_{}.json".format(
        UNIQUE_RESOURCES_SNAPSHOTS,
        datetime.datetime.today().strftime("%d-%m-%Y"))
    out_file = open(out_file_path, "w")
    out_file.write(json.dumps(json_output, indent=4, sort_keys=True))
    out_file.close()

    return out_file_path
コード例 #2
0
def extended_report():
    '''
    Returns all resources in the repository as an excel file with
    predefined data to include.
    Get from url the 'email_to' variable
    '''
    now = datetime.datetime.now()
    then = now - datetime.timedelta(days=15)
    resources = resourceInfoType_model.objects.filter(
        storage_object__deleted=False)
    link = None
    if len(resources) > 0:
        title = "ELRC-SHARE_{}-EXT".format(
            datetime.datetime.now().strftime("%d-%m-%y"))
        output = StringIO.StringIO()
        workbook = xlsxwriter.Workbook(output)

        ## formating
        heading = workbook.add_format({
            'font_size': 11,
            'font_color': 'white',
            'bold': True,
            'bg_color': "#058DBE",
            'border': 1
        })
        bold = workbook.add_format({'bold': True})
        date_format = workbook.add_format({'num_format': 'yyyy, mmmm d'})
        worksheet = workbook.add_worksheet(name=title)

        worksheet.write('A1', 'Resource ID', heading)
        worksheet.write('B1', 'Resource Name', heading)
        worksheet.write('C1', 'Type', heading)
        worksheet.write('D1', 'Mimetypes', heading)
        worksheet.write('E1', 'Linguality', heading)
        worksheet.write('F1', 'Language(s)', heading)
        worksheet.write_comment('F1', 'Delimited by "|" as per language')
        worksheet.write('G1', 'Resource Size', heading)
        worksheet.write_comment('G1', 'Delimited by "|" as per size')
        worksheet.write('H1', 'Resource Size Unit(s)', heading)
        worksheet.write('I1', 'Domain(s)', heading)
        worksheet.write('J1', 'DSI Relevance', heading)
        worksheet.write('K1', 'Legal Status', heading)
        worksheet.write('L1', 'PSI', heading)
        worksheet.write('M1', 'Countries', heading)
        worksheet.write('N1', 'Contacts', heading)
        worksheet.write('O1', 'Partner', heading)
        worksheet.write('P1', 'Project', heading)
        worksheet.write('Q1', 'Processed', heading)
        worksheet.write('R1', 'Related To', heading)
        worksheet.write('S1', 'Validated', heading)
        worksheet.write('T1', 'To be delivered to EC', heading)
        worksheet.write('U1', 'Delivered to EC', heading)
        worksheet.write('V1', 'Status', heading)
        worksheet.write('W1', 'Date', heading)
        worksheet.write('X1', 'Views', heading)
        worksheet.write('Y1', 'Downloads', heading)
        worksheet.write('Z1', 'Delivered to ODP', heading)

        worksheet.write('AA1', 'Personal Data', heading)
        worksheet.write('AB1', 'Sensitive Data', heading)
        worksheet.write('AC1', 'Other Licence Name', heading)
        worksheet.write('AD1', 'Other Licence Terms Text', heading)
        worksheet.write('AE1', 'Other Licence Terms URL', heading)
        worksheet.write('AF1', 'Conditions of Use', heading)
        worksheet.write('AG1', 'IPR Holder', heading)
        worksheet.write('AH1', 'Legal Documentation', heading)
        worksheet.write('AI1', 'Allows Uses Besides DGT', heading)
        worksheet.write('AJ1', 'IPR Clearing Status', heading)
        worksheet.write('AK1', 'IPR Comments', heading)
        worksheet.write('AL1', 'Unique', heading)
        # worksheet.write('AM1', 'Unique', heading)

        j = 1
        for i in range(len(resources)):

            res = resources[i]
            crawled = "YES" if res.resourceCreationInfo and res.resourceCreationInfo.createdUsingELRCServices else "NO"
            psi_list = [
                d.PSI for d in res.distributioninfotype_model_set.all()
            ]
            psi = "YES" if any(psi_list) else "NO"

            country = _get_country(res)
            contacts = []
            licences = []
            try:
                for dist in res.distributioninfotype_model_set.all():
                    for licence_info in dist.licenceInfo.all():
                        licences.append(licence_info.licence)
            except:
                licences.append("underReview")

            for cp in res.contactPerson.all():
                for afl in cp.affiliation.all():
                    try:
                        org_name = afl.organizationName['en']
                    except KeyError:
                        org_name = afl.organizationName[
                            afl.organizationName.keys()[0]]
                # country.append(cp.communicationInfo.country)

                # try to get first and last name otherwise get only last name which is mandatory
                try:
                    contacts.append(u"{} {} ({})".format(
                        cp.surname.values()[0],
                        cp.givenName.values()[0],
                        ", ".join(cp.communicationInfo.email)))
                except IndexError:
                    contacts.append(u"{} ({})".format(
                        cp.surname.values()[0],
                        ", ".join(cp.communicationInfo.email)))
                    # data to be reported
                    # resource name
            try:
                res_name = smart_str(res.identificationInfo.resourceName['en'])
            except KeyError:
                res_name = smart_str(res.identificationInfo.resourceName[
                    res.identificationInfo.resourceName.keys()[0]])

            # date
            date = datetime.datetime.strptime(
                unicode(res.storage_object.created).split(" ")[0], "%Y-%m-%d")

            # stats
            num_downloads = model_utils.get_lr_stat_action_count(
                res.storage_object.identifier, DOWNLOAD_STAT)
            num_views = model_utils.get_lr_stat_action_count(
                res.storage_object.identifier, VIEW_STAT)

            # A1
            worksheet.write(j, 0, res.id)
            # B1
            worksheet.write(j, 1, res_name.decode('utf-8'), bold)
            # C1
            worksheet.write(j, 2, res.resource_type())
            # D1
            mimetypes = _get_resource_mimetypes(res)
            if mimetypes:
                mim = []
                for d in mimetypes:
                    mim.append(d)
                worksheet.write(j, 3, " | ".join(mim))
            else:
                worksheet.write(j, 3, "N/A")
            # E1
            linguality = _get_resource_linguality(res)
            worksheet.write(j, 4, ", ".join(linguality))
            # F1
            lang_info = _get_resource_lang_info(res)
            size_info = _get_resource_sizes(res)
            langs = []
            lang_sizes = []
            for l in lang_info:
                langs.append(l)
                lang_sizes.extend(_get_resource_lang_sizes(res, l))
            worksheet.write(j, 5, " | ".join(langs))
            # G1, H1
            preferred_size = _get_preferred_size(res)
            if preferred_size:
                if float(preferred_size.size).is_integer():
                    size_num = int(preferred_size.size)
                else:
                    size_num = float(preferred_size.size)
                worksheet.write_number(j, 6, size_num)
                worksheet.write(
                    j, 7, prettify_camel_case_string(preferred_size.sizeUnit))
            else:
                worksheet.write(j, 6, "")
                worksheet.write(j, 7, "")

            domain_info = _get_resource_domain_info(res)
            dsis = "N/A"
            if res.identificationInfo.appropriatenessForDSI:
                dsis = ", ".join(res.identificationInfo.appropriatenessForDSI)
            # I1
            if domain_info:
                domains = []
                for d in domain_info:
                    domains.append(d)
                worksheet.write(j, 8, " | ".join(domains))
            else:
                worksheet.write(j, 8, "N/A")
            # J1
            worksheet.write(j, 9, dsis)
            # K1
            worksheet.write(j, 10, ", ".join(licences))
            # L1
            worksheet.write(j, 11, psi)
            # M1
            if country:
                worksheet.write(j, 12, country)
            else:
                worksheet.write(j, 12, "N/A")
            # N1
            if contacts:
                worksheet.write(j, 13, " | ".join(contacts))
            else:
                worksheet.write(j, 13, "N/A")
            # O1
            partner = res.management_object.partner_responsible
            worksheet.write(j, 14, partner)
            # P1
            # Funding projects
            try:
                rc = res.resourceCreationInfo
                try:
                    funding_projects = [
                        fp.projectShortName['en']
                        for fp in rc.fundingProject.all()
                    ]
                except KeyError:
                    funding_projects = [
                        fp.projectName['en'] for fp in rc.fundingProject.all()
                    ]
            except AttributeError:
                funding_projects = []
            worksheet.write(j, 15, ", ".join(funding_projects))
            # Q1
            is_processed = "YES" if res.management_object.is_processed_version else "NO"
            worksheet.write(j, 16, is_processed)
            # R1
            # related_ids
            related_ids = ""
            if res.relationinfotype_model_set.all():
                related_ids = ", ".join(
                    set([
                        rel.relatedResource.targetResourceNameURI
                        for rel in res.relationinfotype_model_set.all()
                    ]))
            worksheet.write(j, 17, related_ids)
            # S1
            validated = "YES" if res.storage_object.get_validation() else "NO"
            worksheet.write(j, 18, validated)
            # T1
            to_be_delivered = "" if not res.management_object.to_be_delivered_to_EC else res.management_object.to_be_delivered_to_EC
            worksheet.write(j, 19, to_be_delivered)
            # U1
            delivered = "" if not res.management_object.delivered_to_EC else res.management_object.delivered_to_EC
            worksheet.write(j, 20, delivered)
            # V1
            worksheet.write(j, 21,
                            status[res.storage_object.publication_status])
            # W1
            worksheet.write_datetime(j, 22, date, date_format)
            worksheet.write(j, 23, num_views)
            worksheet.write(j, 24, num_downloads)
            try:
                odp = "YES" if res.management_object.delivered_odp else "NO"
            except ObjectDoesNotExist:
                odp = "NO"
            worksheet.write(j, 25, odp)

            personal_data = "YES" if True in set([
                d.personalDataIncluded
                for d in res.distributioninfotype_model_set.all()
            ]) else "NO"
            worksheet.write(j, 26, personal_data)

            sensitive_data = "YES" if True in set([
                d.sensitiveDataIncluded
                for d in res.distributioninfotype_model_set.all()
            ]) else "NO"
            worksheet.write(j, 27, sensitive_data)

            other_licences = []
            for dist in res.distributioninfotype_model_set.all():
                for licence_info in dist.licenceInfo.all():
                    other_licences.append(licence_info.otherLicenceName)

            try:
                worksheet.write(j, 28, ", ".join(other_licences))
            except TypeError:
                pass

            other_licences_text = []
            for dist in res.distributioninfotype_model_set.all():
                for licence_info in dist.licenceInfo.all():
                    other_licences_text.append(
                        best_lang_value_retriever(
                            licence_info.otherLicence_TermsText))

            worksheet.write(j, 29, ", ".join(other_licences_text))

            other_licences_url = []
            for dist in res.distributioninfotype_model_set.all():
                for licence_info in dist.licenceInfo.all():
                    other_licences_url.append(
                        licence_info.otherLicence_TermsURL)

            worksheet.write(j, 30, ", ".join(other_licences_url))

            restrictions = []
            for dist in res.distributioninfotype_model_set.all():
                for licence_info in dist.licenceInfo.all():
                    restrictions.extend(licence_info.restrictionsOfUse)

            worksheet.write(j, 31, ", ".join(restrictions))

            ipr_holders = []
            for dist in res.distributioninfotype_model_set.all():
                for ip in dist.iprHolder.all():
                    subclass = ip.as_subclass()
                    if isinstance(ip.as_subclass(),
                                  organizationInfoType_model):
                        ipr_holders.append(u"{} ({})".format(
                            best_lang_value_retriever(
                                subclass.organizationName).encode(
                                    'utf-8').decode('utf-8'),
                            u", ".join(subclass.communicationInfo.email)))
                    else:
                        ipr_holders.append(u"{} {} ({})".format(
                            best_lang_value_retriever(
                                subclass.givenName).encode('utf-8').decode(
                                    'utf-8'),
                            best_lang_value_retriever(subclass.surname).encode(
                                'utf-8').decode('utf-8'),
                            ", ".join(subclass.communicationInfo.email)))
            worksheet.write(j, 32, u", ".join(ipr_holders))

            worksheet.write(
                j, 33, "YES"
                if res.storage_object.get_legal_documentation() else "NO")

            dgt = "YES" if True in set([
                d.allowsUsesBesidesDGT
                for d in res.distributioninfotype_model_set.all()
            ]) else "NO"
            ipr_status = prettify_camel_case_string(res.management_object.ipr_clearing) if \
                res.management_object.ipr_clearing else ""
            worksheet.write(j, 34, dgt)
            worksheet.write(j, 35, ipr_status)
            ipr_comments = res.management_object.comments
            worksheet.write(j, 36, ipr_comments)
            # resource_description = best_lang_value_retriever(res.identificationInfo.description)
            # worksheet.write(j, 37, resource_description)
            is_unique = "YES" if (_is_processed(res) or
                                  _is_not_processed_or_related(res)) else "NO"

            worksheet.write(j, 37, is_unique)
            j += 1
            # worksheet.write(i + 1, 3, _get_resource_size_info(res))
        # worksheet.write(len(resources)+2, 3, "Total Resources", bold)
        # worksheet.write_number(len(resources)+3, 3, len(resources))
        worksheet.freeze_panes(1, 0)
        workbook.close()

        output.seek(0)
        return {"output": output, "title": title}
コード例 #3
0
def _cefdigital_report():
    '''
    Returns all resources in the repository as an excel file with
    predefined data to include.
    Get from url the 'email_to' variable
    '''
    now = datetime.datetime.now()
    then = now - datetime.timedelta(days=15)
    # get all resources and filter further
    resources = resourceInfoType_model.objects.filter(
        storage_object__deleted=False)
    unique_resources = []

    for r in resources:
        if _is_processed(r) or _is_not_processed_or_related(r):
            unique_resources.append(r)

    if len(unique_resources) > 0:
        output = StringIO.StringIO()
        workbook = xlsxwriter.Workbook(output)

        ## formating
        heading = workbook.add_format(
            {'font_size': 11, 'font_color': 'white', 'bold': True, 'bg_color': "#058DBE", 'border': 1})
        bold = workbook.add_format({'bold': True})
        date_format = workbook.add_format({'num_format': 'yyyy, mmmm d'})
        title = "ELRC-SHARE_CEF-DIGITAL_{}".format(
            datetime.datetime.now().strftime("%d-%m-%y"))
        worksheet = workbook.add_worksheet(name=title)

        worksheet.write('A1', 'Resource ID', heading)
        worksheet.write('B1', 'Resource Name', heading)
        worksheet.write('C1', 'Type', heading)
        worksheet.write('D1', 'Linguality', heading)
        worksheet.write('E1', 'Language(s)', heading)
        worksheet.write_comment('E1', 'Delimited by "|" as per language')
        worksheet.write('F1', 'Resource Size', heading)
        worksheet.write_comment('F1', 'Delimited by "|" as per size')
        worksheet.write('G1', 'Resource Size Unit(s)', heading)
        worksheet.write('H1', 'Domain(s)', heading)
        worksheet.write('I1', 'DSI Relevance', heading)
        worksheet.write('J1', 'Legal Status', heading)
        worksheet.write('K1', 'Countries', heading)

        link = True

        j = 1
        for i in range(len(unique_resources)):

            res = unique_resources[i]
            country = _get_country(res)
            licences = []
            try:
                for dist in res.distributioninfotype_model_set.all():
                    for licence_info in dist.licenceInfo.all():
                        licences.append(licence_info.licence)
            except:
                licences.append("underReview")
            try:
                res_name = smart_str(res.identificationInfo.resourceName['en'])
            except KeyError:
                res_name = smart_str(res.identificationInfo.resourceName[res.identificationInfo.resourceName.keys()[0]])

            # date
            date = datetime.datetime.strptime(unicode(res.storage_object.created).split(" ")[0], "%Y-%m-%d")

            # A1
            worksheet.write(j, 0, res.id)
            # B1
            worksheet.write(j, 1, res_name.decode('utf-8'), bold)
            # C1
            worksheet.write(j, 2, res.resource_type())
            # D1
            linguality = _get_resource_linguality(res)
            worksheet.write(j, 3, ", ".join(linguality))
            # E1
            lang_info = _get_resource_lang_info(res)
            size_info = _get_resource_sizes(res)
            langs = []
            for l in lang_info:
                langs.append(l)
            worksheet.write(j, 4, " | ".join(langs))
            # F1, G1
            preferred_size = _get_preferred_size(res)
            if preferred_size:
                if float(preferred_size.size).is_integer():
                    size_num = int(preferred_size.size)
                else:
                    size_num = float(preferred_size.size)
                worksheet.write_number(j, 5, size_num)
                worksheet.write(j, 6, prettify_camel_case_string(preferred_size.sizeUnit))
            else:
                worksheet.write(j, 5, "")
                worksheet.write(j, 6, "")
            # H1
            domain_info = _get_resource_domain_info(res)
            if domain_info:
                domains = []
                for d in domain_info:
                    domains.append(d)
                worksheet.write(j, 7, " | ".join(domains))
            else:
                worksheet.write(j, 7, "N/A")
            # I1
            dsis = "N/A"
            if res.identificationInfo.appropriatenessForDSI:
                dsis = ", ".join(res.identificationInfo.appropriatenessForDSI)
            worksheet.write(j, 8, dsis)
            # J1
            worksheet.write(j, 9, ", ".join(licences))
            # K1
            if country:
                worksheet.write(j, 10, country)
            else:
                worksheet.write(j, 10, "N/A")

            j += 1
            # worksheet.write(i + 1, 3, _get_resource_size_info(res))
        # worksheet.write(len(resources)+2, 3, "Total Resources", bold)
        # worksheet.write_number(len(resources)+3, 3, len(resources))
        worksheet.freeze_panes(1, 0)
        workbook.close()

        # Send email
        msg_body = "Check generated CEF-DIGITAL report"
        msg = EmailMessage("[ELRC] ERLC-SHARE CEF-Digital report (DRAFT)", msg_body,
                           from_email='*****@*****.**', to=ILSP_ADMINS)
        msg.attach("{}.xlsx".format(title), output.getvalue(),
                   "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
        try:
            msg.send()
        except SMTPException as e:
            print('There was an error sending an email: ', e)
        return HttpResponse("{}: CEF Digital repository report sent to: {}\n"
                            .format(datetime.datetime.now().strftime("%a, %d %b %Y"), ", ".join(ILSP_ADMINS)))