"Inventor-Country",
        "Applicant-Country",
        "equivalents",
        "CPC",
        u'references',
        u'CitedBy',
        'prior',
        'family lenght',
        'CitO',
        'CitP'
    ]

    print "\n> Hi! This is DataTable Families formater", ndf
    if 'Description' + ndf in os.listdir(ListBiblioPath):
        with open(ListBiblioPath + '//' + ndf, 'r') as data:
            dico = LoadBiblioFile(ListBiblioPath, ndf)
    else:  #Retrocompatibility
        print "please use Comptatibilizer"
        sys.exit()
    LstBrevet = dico['brevets']
    if dico.has_key('requete'):
        requete = dico["requete"]
        print "Using ", ndf, " file. Found ", len(
            dico["brevets"]), " patents! Formating to HMTL tables"

    LstExp = []
    LstExp2 = []
    #just for testing last fnction in gathered should deseapear soon

    for brev in LstBrevet:
        #brev = CleanPatent(brev)
Ejemplo n.º 2
0
if IsEnableScript:
    # the list of keys for filtering for datatable
    clesRef = ['label', 'title', 'year','priority-active-indicator',
    'IPCR11', 'kind', 'applicant', 'country', 'inventor', 'representative', 'IPCR4',
    'IPCR7', "Inventor-Country", "Applicant-Country", "equivalents", "CPC", u'references', u'Citations', u'CitedBy']

    prefixes = [""]
    if GatherFamilly:
        prefixes.append("Families")

    for prefix in prefixes:
        ndf = prefix + configFile.ndf

        if 'Description'+ndf in os.listdir(ListBiblioPath): # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory
            LstBrevet = LoadBiblioFile(ListBiblioPath, ndf)
            with open(ListBiblioPath +'//Description'+ndf, 'r') as ficRes:
                DataBrevet = cPickle.load(ficRes)
        else: #Retrocompatibility
            with open(ListBiblioPath+'//'+ndf, 'r') as data:
                LstBrevet = cPickle.load(data)

        ##next may need clarifying update

        data = LstBrevet
        LstBrevet = data['brevets']
        if data.has_key('requete'):
            requete = data["requete"]
        if data.has_key('number'):
            print "Found ", data["number"], " patents! Formating to HMTL tables"
    return dico

if IsEnableScript:
    GatherContent = True
    #not fun
    registered_client = epo_ops.Client(key, secret)
    #        data = registered_client.family('publication', , 'biblio')
    registered_client.accept_type = 'application/json'

    for ndf in [fic2 for fic2 in os.listdir(ResultBiblioPath) if fic2.count('Description')==0]:
        if ndf.startswith('Families'):
            typeSrc = 'Families'
        else:
            typeSrc = ''
        if 'Description'+ndf or 'Description'+ndf.lower() in os.listdir(ResultListPath): # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory
            ficBrevet = LoadBiblioFile(ResultListPath, ndf)

        else: #Retrocompatibility
            print 'gather your data again. sorry'
            sys.exit()

        if ficBrevet.has_key('brevets'):
            lstBrevet = ficBrevet['brevets']
    #        if data.has_key('requete'):
    #            DataBrevet['requete'] = data["requete"]
            print "Found ",typeSrc, ' file and', len(lstBrevet), " patents! Gathering contents"
        else:
            print 'gather your data again'
            sys.exit()

        registered_client = epo_ops.Client(key, secret)
Ejemplo n.º 4
0
        return dico
    else:
        return dico


if GatherFamilly:
    print "\n> Hi! This is the family gatherer. Processing ", ndf
    try:

        fic = open(ResultPath + '//' + ndf, 'r')

        print "loading data file ", ndf + ' from ', ResultPath, " directory."
        if 'Description' + ndf or "Description" + ndf.title() in os.listdir(
                ResultPath
        ):  # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory
            data = LoadBiblioFile(ResultPath, ndf)

        else:  #Retrocompatibility :-)
            print "gather your data again"
            sys.exit()
        if isinstance(data, collections.Mapping):
            ListeBrevet = data['brevets']
            if data.has_key('number'):
                print "Found ", data["number"], " patents!  and ", len(
                    ListeBrevet), " gathered."
        else:
            print 'data corrupted. Do something (destroying data directory is a nice idea)'
            sys.exit()
        print len(ListeBrevet), " patents loaded from file."
        print "Augmenting list with families."
        ficOk = True
Ejemplo n.º 5
0
        NeededInfo.extend(mixNet)  # list of needed field for building the net
        # may be should use  from
        # from collections import OrderedDict
        # class OrderedNodeGraph(nx.Graph):
        #   node_dict_factory=OrderedDict
        # G = OrderedNodeGraph()
        G1 = nx.DiGraph()  # dynamic network for Gephi
        attr_dict = dict()  # attributes for the net
        # flat net for gexf.js may be it is possible to use previous instead of this one...

        if 'Description' + ndf in os.listdir(
                BiblioPath
        ):  # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory
            print network, ": loading data with ", " and ".join(
                mixNet), " fields."
            DataBrevet = LoadBiblioFile(BiblioPath, ndf)
            print "Hi this is Pre-Network processor. Bibliographic data of ", ndf, " patent universe found."
        else:  #Retrocompatibility
            print "please use Comptatibilizer"

        print "Nice, ", len(
            DataBrevet["brevets"]
        ), " patents found. Pre-formating ", sys.argv[1], " net."
        for brev in DataBrevet["brevets"]:
            #tempo = pickle.load(fic) # we only memorize needed nfo
            pat = OrderedDict()
            if "date" not in brev.keys():
                brev['date'] = '1-1-1'
            if isinstance(brev['label'], list):
                brev['label'] = brev['label'][0]
            for key in NeededInfo:
Ejemplo n.º 6
0
            if len(lstBrevets) == nbTrouves and nbActus == nbTrouves:
                ficOk = True
                print nbTrouves, " patents gathered yet. No more patents to retreive. Steping to bibliographic data."
            else:
                ficOk = False
                print nbTrouves, " patents corresponding to the request."

                print len(
                    lstBrevets
                ), ' in file corresponding to the request. Retreiving associated bibliographic data'
        else:
            print "You prefer not to gather data. I hope you know what you do. At your own risk. P2N may crash"
except:
    try:

        lstBrevets = LoadBiblioFile(ResultBiblioPath, ndf)
        nbActus = len(lstBrevets)
        ficOk = True

    except:
        lstBrevets = [
        ]  # gathering all again, I don t know if of serves the same ordered list of patents
        ficOknd = False
        nbTrouves = 1
STOP = False
#else:
#
#    print "Good, nothing to do"
if not ficOk and GatherPatent:
    while len(lstBrevets) < nbTrouves and not STOP:
        if len(lstBrevets) + 25 < 2000:
Ejemplo n.º 7
0
#should set a working dir one upon a time... done it is temporPath
ResultBiblioPath = configFile.ResultBiblioPath
ResultPatentPath = configFile.ResultListPath
ResultContentsPath = configFile.ResultContentsPath

GlobalPath = configFile.GlobalPath

# take request from BiblioPatent file

if 'Description' + ndf in os.listdir(
        ResultBiblioPath
) or 'Description' + ndf.title() in os.listdir(
        ResultBiblioPath
):  # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory
    data = LoadBiblioFile(ResultBiblioPath, ndf)
    requete = data['requete']
else:  #Retrocompatibility
    print "please use Comptatibilizer"
    #if 'Fusion' in data.keys()
    data = dict()
if GatherFamilly:  #pdate needed for families
    if 'DescriptionFamilies' + ndf in os.listdir(
            ResultBiblioPath
    ) or 'DescriptionFamilies' + ndf.title() in os.listdir(
            ResultBiblioPath
    ):  # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory
        data2 = LoadBiblioFile(ResultBiblioPath, 'Families' + ndf)
        nbFam = len(data2['brevets'])
    else:  #Retrocompatibility
        print "please use Comptatibilizer"
Ejemplo n.º 8
0
def run():

    # Bootstrap logging
    boot_logging()

    # Load configuration
    config = LoadConfig()

    # Run this only if enabled
    if not config.GatherImages:
        return

    # Get some information from configuration
    expression = config.requete
    storage_basedir = config.ResultBiblioPath
    storage_dirname = config.ndf
    output_path = config.ResultPathImages

    # Compute prefixes
    prefixes = [""]
    if config.GatherFamilly:
        prefixes.append("Families")

    # Build maps for all prefixes
    for prefix in prefixes:

        # Status message
        label = label_from_prefix(prefix)
        logger.info("Generating gallery of drawings for {}. ".format(label))

        # Compute storage slot using prefix and DataDirectory
        # e.g. "Lentille" vs. "FamiliesLentille"
        storage_name = prefix + storage_dirname

        # Load bibliographic data
        biblio_file = LoadBiblioFile(storage_basedir, storage_name)

        # Generate thumbnails
        gallery = []
        patents = biblio_file['brevets']
        for patent in patents:
            patent_label = get_patent_label(patent)
            i = 1
            logger.info('Processing patent {}'.format(patent_label))
            path_img_base = '{}//{}-{}.tiff'.format(output_path, patent_label, '{}')
            path = path_img_base.format(i)
            while os.path.exists(path):
                thumb, orig, tiff = generate_thumbnails(path)
                gallery.append({
                    "_id": '{}-{}'.format(patent_label, i),
                    'thumb': thumb,
                    'orig': orig,
                    'label': patent['title'],
                    'ipcr7': patent['IPCR7'],
                    'code': patent_label,
                    'tiff': tiff,
                })
                i += 1
                path = path_img_base.format(i)

        # Render gallery
        RenderTemplate(
            'ModeleImages.html',
            output_path + '/index' + prefix + '.html',
            request=expression.replace('"', ''),
            gallery=gallery,
            json=json.dumps(gallery),
        )
Ejemplo n.º 9
0
        u'Citations',  # the number of citations granted by the document
        #u'CitedBy',     # the list of docs (patents) cititng this patent
        #'CitP',         # the patents cited by this patent
        #'CitO'          # the other docs cited by this patent
    ]  #"citations"

    #filterFile = [fi for fi in os.listdir(ListBiblioPath) if fi.count('Expanded')]
    srcFile = [
        fi.replace('Description', '') for fi in os.listdir(ListBiblioPath)
    ]

    for ndf in set(srcFile):
        if 'Description' + ndf in os.listdir(
                ListBiblioPath
        ):  # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory
            DataBrevet = LoadBiblioFile(ListBiblioPath, ndf)
            print "\n> Hi! This is FormateExportPivotTable"
        else:  #Retrocompatibility... prévious test is ugly: there is an issue with filename in lowercase (sometimes)
            print "please use Comptatibilizer"
            DataBrevet = LoadBiblioFile(ListBiblioPath,
                                        ndf)  #so I try to laod it....

        if isinstance(DataBrevet, collections.Mapping):
            #data = DataBrevet
            LstBrevet = DataBrevet['brevets']
            if DataBrevet.has_key('number'):
                print "Found ", DataBrevet[
                    "number"], " patents! Formating into HMTL Pivot tables"
            else:
                print "Found ", len(
                    DataBrevet["brevets"]
Ejemplo n.º 10
0
if IsEnableScript:
    Rep = configFile.ResultContentsPath
    Bib = configFile.ResultBiblioPath

    prefixes = [""]
    if GatherFamilly:
        prefixes.append("Families")

    for prefix in prefixes:
        ndf = prefix + configFile.ndf

        if 'Description' + ndf in os.listdir(
                Bib
        ):  # NEW 12/12/15 new gatherer append data to pickle file in order to consume less memory
            DataBrevet = LoadBiblioFile(Bib, ndf)
            LstBrevet = DataBrevet['brevets']
        else:  #Retrocompatibility
            print "please use Comptatibilizer"

        try:
            os.makedirs(Rep + "//Carrot2")
        except:
            #directory exists
            pass
        temporar = GenereListeFichiers(Rep)
        for det in ['Abstract', 'Claims', 'Description']:
            ind = 0
            for lang in ['FR', 'EN', 'UNK']:
                NomResult = lang + '_' + det.replace(
                    'Abstracts', ''
Ejemplo n.º 11
0
ResultPathImages = configFile.ResultPathImages
P2NFamilly = configFile.GatherFamilly

if IsEnableScript:
    ops_client = epo_ops.Client(key, secret)
    ops_client.accept_type = 'application/json'

    prefixes = [""]
    if P2NFamilly:
        prefixes.append("Families")

    for prefix in prefixes:
        ndf = prefix + configFile.ndf

        try:
            biblio_file = LoadBiblioFile(ResultBiblioPath, ndf)
        except IOError as ex:
            print 'WARNING: Could not load information for "{}". Not found / error: {}'.format(ndf, ex)

        patents = biblio_file['brevets']
        metadata = {}

        for patent in patents:
            patent_label = get_patent_label(patent)
            pathes = []
            path_json = '{}//{}.json'.format(ResultPathImages, patent_label)
            path_image = '{}//{}-{}.tiff'.format(ResultPathImages, patent_label, '{}')
            print "Processing patent {}".format(patent_label)
            js = get_images_meta(ops_client, patent_label, path_json)
            if not js:
                continue
Ejemplo n.º 12
0
ResultListPath = configFile.ResultListPath
ResultBiblioPath = configFile.ResultBiblioPath

if IsEnableScript:
    LoadDescs()

    prefixes = [""]
    if P2NFamilly:
        prefixes.append("Families")

    for prefix in prefixes:
        ndf = prefix + configFile.ndf
        try:
            with open(ResultBiblioPath+'//'+ndf, 'r') as fic:
                DataBrevets1 = LoadBiblioFile(ResultBiblioPath, ndf)
                BrevetsTotal = str(len(DataBrevets1['brevets']))
        except:
            print "Error: there are no data to generate de FreePlane file"
        # End of Load patent file
        #

        ### ugly code to patch classification extraction inconsistency
        for bre in DataBrevets1['brevets']:
            if isinstance(bre['classification'], list):
                if '' in bre['classification']:
                    bre['classification'].remove('')
            bre['IPCR11'] =  bre['classification']

            lstIPC = [ipc[0] for ipc in bre['classification']]
            for ipc in lstIPC: