Ejemplo n.º 1
0
def _write_wikidata_file_items(items):
    with open(config['data']['wikidata_output'], 'a') as out:
        writer = UnicodeWriter(out)
        for item in items:
            row = [unicode(item[k]) for k in FIELDS]
            writer.writerow(row)
        out.flush()
Ejemplo n.º 2
0
def _write_wikidata_file_header(config):
    if not os.path.isfile(config['data']['wikidata_output']) or \
            os.stat(config['data']['wikidata_output']).st_size == 0:
        with open(config['data']['wikidata_output'], 'w') as out:
            writer = UnicodeWriter(out)
            keys = [unicode(k) for k in FIELDS]
            writer.writerow(keys)
            out.flush()
Ejemplo n.º 3
0
def get_wikipedia_pages(config):

    logger.debug('Try DB')
    pages = get_wikipedia_pages_from_database()

    if not pages:
        logger.debug('Try API')
        pages = get_wikipedia_pages_from_api()

    if pages:
        with open(config['data']['wikipedia_output'], 'w') as out:
            writer = UnicodeWriter(out)
            keys = [unicode(k) for k in FIELDS]
            writer.writerow(keys)
            for page in pages:
                p = [unicode(page[k]) for k in keys]
                writer.writerow(p)
Ejemplo n.º 4
0
    def on_csv(self, event):
        f = dialogo_grabar_fic(
            self, "csv",
            "Archivos csv (*.csv)|*.csv|Todos los archivos (*.*)|*", "")
        if f:
            maximo = self.list.GetItemCount()
            dlg = wx.ProgressDialog(
                "Exportando datos",
                "Exportar",
                maximum=maximo,
                parent=self,
                style=wx.PD_APP_MODAL | wx.PD_CAN_ABORT | wx.PD_ELAPSED_TIME
                | wx.PD_ESTIMATED_TIME | wx.PD_REMAINING_TIME
                | wx.PD_AUTO_HIDE)
            dlg.SetIcon(imagenes.importar.GetIcon())
            try:
                column_id = self.column_id
                nombres = list()
                for i in range(0, self.list.GetColumnCount()):
                    nombres.append(self.list.GetColumn(i).GetText())

                delimitador = str(self.parent.data[FICHERO_CSV]["delimitador"])
                with open(f, 'wb') as f1:
                    w = UnicodeWriter(f1,
                                      encoding="utf-8",
                                      delimiter=delimitador,
                                      quotechar='"')
                    w.writerow(nombres)
                    for i in range(0, self.list.GetItemCount()):
                        (sigue, salta) = dlg.Update(i, u"línea:%d" % i)
                        if not sigue:
                            break
                        nuevo = list()
                        for j in range(0, self.list.GetColumnCount()):
                            nuevo.append(self.list.GetItem(i, j).GetText())

                        if nuevo[column_id] != VALOR_ID_VACIA:
                            w.writerow(nuevo)

                        if i % 100 == 0: wx.Yield()
            except Exception, e:
                wx.MessageBox(e.message, APLICACION, wx.ICON_ERROR)

            finally:
Ejemplo n.º 5
0
    config = read_config()
    # print
    # print _test_with_wikidataquery('P396')
    # print
    # print _test_with_wikidataquery('P244')
    # print
    # for page in pages_with_template('Template:Giove', 'it'):
    #     print page

    if args.drop:
        drop(config['data']['wikipedia_output'])
        drop(config['data']['wikidata_output'])
        drop(config['data']['update_output'])
        sys.exit(0)

    if args.properties:
        args.data = True

    get_items(args.pedia, args.data, args.properties)

    items_to_update = get_all_items_to_update(config)

    with open(config['data']['update_output'], 'w') as out:
        writer = UnicodeWriter(out)
        keys = [unicode(k) for k in OUT_FIELDS]
        writer.writerow(keys)
        for item in items_to_update:
            row = [unicode(item[k]) for k in keys]
            writer.writerow(row)