Esempio n. 1
0
    def get_data(self, f, **kwargs):
        location = kwargs.get('meetlocatie',None)
        if location and isinstance(location, MeetLocatie):
            location = location.name

        params = kwargs.get('parameter',self.parm)
        if params and isinstance(params, Parameter):
            params = [params.name]
            
        # find data for location and one of the parameters (always one parameter per json file)
        dfs = {}
        for ts in ijson.items(f,'results.item'):
            pname = ts['name']
            if not pname in params:
                # only 1 parameter per file
                break
            lcode = ts['location']['organisation_code']
            if not location or lcode == location: 
                events = ts['events']
                data = []
                if events:
                    for e in events:
                        tmin = e['min']
                        tmax = e['min']
                        tgem = (tmin+tmax)/2 if (tmin and tmax) else None
                        t=e['timestamp']
                        data.append((datetime.datetime.fromtimestamp(t/1000),tgem))
                if data:
                    df = pd.DataFrame.from_records(data, index=['datum'], columns=['datum',pname])
                    if location:
                        # requested for a single location
                        return df
                    dfs[lcode] = df
        return dfs
Esempio n. 2
0
 def iter_locations(self, fil):
     ''' iterates over point locations and returns id, coords, description tuple'''
     for feature in ijson.items(fil,'results.item.location'):
         geom = feature['geometry']
         if geom and geom['type'] == 'Point':
             x,y,z = geom['coordinates']
             coords = [float(x),float(y)]
             mcode = feature['organisation_code']
             moms = feature['name']
             yield (mcode,coords,moms)
Esempio n. 3
0
    def parse_json(self):
        """
		yields list of values, where each value corresponds to each dictionary key in keys,
		yields over all JSON objects in .json file
		"""
        with open(self.filepath, 'rb') as f:
            for json_obj in ijson.items(f, 'item'):
                # since the file is a list of json objects, each json_obj currently is a dictionary
                yield [
                    self.get_values(json_obj, keys) for keys in self.all_keys
                ]
Esempio n. 4
0
    def _get_structure_info(self, view):
        """
        """
        #get structure info command
        text = view.substr(Region(0, view.size()))
        cmd = self.get_structure_info_cmd(view, text)
        timeout = self.get_settings(view, "sourcekitten_timeout", 1.0)

        # run structure info command
        p = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
        structure_info = list(ijson.items(p.stdout, ''))[0]

        return structure_info
Esempio n. 5
0
    def _get_structure_info(self, view):
        """
        """
         #get structure info command
        text = view.substr(Region(0, view.size()))
        cmd = self.get_structure_info_cmd(view, text)
        timeout = self.get_settings(view, "sourcekitten_timeout", 1.0)

        # run structure info command
        p = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
        structure_info = list(ijson.items(p.stdout,''))[0]

        return structure_info
Esempio n. 6
0
def get_entries(fp, iterative=True):
    if fp is sys.stdin:
        iterative = True
        fp = fp.buffer

    if ijson is None or not iterative:
        data = fp.read()
        if isinstance(data, bytes):
            data = data.decode('utf-8')
        data = json.loads(data)
        return data['log']['entries']
    else:
        return ijson.items(fp, 'log.entries.item')
Esempio n. 7
0
    def init_from_cosmogony(cls, cosmogony_path):
        zones_index = cls()

        print('Reading zones...')
        with open(cosmogony_path, 'rb') as f:
            zones = ijson.items(f, 'zones.item')
            for z in zones:
                z.pop('geometry', None)
                zones_index.insert(z)
        print('{} zones have been read'.format(len(zones_index)))

        zones_index.build_children()
        return zones_index
Esempio n. 8
0
def _iter_locations_fo(fo, start, stop) -> Iterator[Location]:
    logger = get_logger()

    total = 0
    errors = 0

    try:
        from mycfg.locations import LOCATIONS as known_locations
    except ModuleNotFoundError as e:
        name = 'mycfg.locations'
        if e.name != name:
            raise e
        logger.warning(
            "'%s' isn't found. setting known_locations to empty list", name)
        known_locations = []

    # TODO tagging should be takeout-agnostic
    def tagger(dt: datetime, point: geopy.Point) -> Tag:
        '''
        Tag points with known locations (e.g. work/home/etc)
        '''
        for lat, lon, dist, tag in known_locations:
            # TODO use something more efficient?
            if geopy.distance.distance((lat, lon), point).m < dist:
                return tag
        else:
            return None

    for j in islice(ijson.items(fo, 'locations.item'), start, stop):
        dt = datetime.utcfromtimestamp(int(j["timestampMs"]) / 1000)
        if total % 10000 == 0:
            logger.info('processing item %d %s', total, dt)
        total += 1

        dt = pytz.utc.localize(dt)
        try:
            lat = float(j["latitudeE7"] / 10000000)
            lon = float(j["longitudeE7"] / 10000000)
            point = geopy.Point(
                lat, lon)  # kinda sanity check that coordinates are ok
        except Exception as e:
            logger.exception(e)
            errors += 1
            if float(errors) / total > 0.01:
                raise RuntimeError('too many errors! aborting')
            else:
                continue

        alt = j.get("altitude", None)
        tag = tagger(dt, point)  # TODO take accuracy into account??
        yield Location(dt=dt, lat=lat, lon=lon, alt=alt, tag=tag)
Esempio n. 9
0
def load_batches(filename, model, batch_size=0):
    batch_index = 0
    batches = []
    item_count = 0
    index = 0
    group = ''
    progress = utils.ProgressText()

    batch_dims = load_batches_dims(filename, model, batch_size)

    with open(filename, 'r') as jsonfile:
        items = ijson.items(jsonfile, 'item')

        for index, item in enumerate(items):
            if batch_size > 0:
                batch_index = int(index / batch_size)

            # start of batch
            if batch_index >= len(batches):
                batches.append({'labels': []})

                for group in model.io_names:
                    batches[batch_index][group] = np.zeros(batch_dims[batch_index][group], dtype=np.float32)

            for group in model.io_names:
                to_array(
                    model.io_names[group],
                    item['data'][group],
                    batches[batch_index][group][item_count],
                    batch_dims[batch_index][group][1:]
                )

            if 'label' in item:
                batches[batch_index]['labels'].append(item['label'])
            else:
                batches[batch_index]['labels'].append('')

            item_count += 1

            # end of batch
            if batch_size == item_count:
                item_count = 0

            if ((index + 1) % 100) == 0:
                progress.text('import items: ' + str(index + 1) + '/' + str(batch_dims[batch_index][group][0]))

        progress.text('import items: ' + str(index + 1) + '/' + str(batch_dims[batch_index][group][0]))
        sys.stdout.write('\n')

    return batches
Esempio n. 10
0
def _iter_via_ijson(fo) -> Iterator[TsLatLon]:
    # ijson version takes 25 seconds for 1M items (without processing)
    try:
        # pip3 install ijson cffi
        import ijson.backends.yajl2_cffi as ijson # type: ignore
    except:
        import warnings
        warnings.warn("Falling back to default ijson because 'cffi' backend isn't found. It's up to 2x faster, you might want to check it out")
        import ijson # type: ignore

    for d in ijson.items(fo, 'locations.item'):
        yield (
            int(d['timestampMs']),
            d['latitudeE7' ],
            d['longitudeE7'],
        )
Esempio n. 11
0
def _import_cosmogony_to_pg(cosmogony_path):
    _pg_execute("""
        CREATE SCHEMA IF NOT EXISTS import;
        DROP TABLE IF EXISTS import.zones;

        CREATE TABLE IF NOT EXISTS import.zones(
            id bigint NOT NULL,
            parent bigint,
            name varchar,
            admin_level int,
            zone_type varchar,
            osm_id varchar,
            wikidata varchar,
            geometry geometry,
            PRIMARY KEY (id)
        )
        WITH (OIDS=FALSE);

        CREATE INDEX ON import.zones USING gist(geometry);

        CREATE INDEX ON import.zones (parent);
    """)

    print("Importing cosmogony to pg...")
    start = time.clock()
    nb_zones = 0

    def print_timer():
        print(f"{nb_zones} zones imported in "
              f"{timedelta(seconds=(time.clock()-start))}")

    with open(cosmogony_path, "rb") as f:
        zones = ijson.items(f, "zones.item")

        with _pg_connect() as conn:
            with conn.cursor() as cur:
                for z in zones:
                    z["geometry"] = rapidjson.dumps(z.pop("geometry"),
                                                    number_mode=NM_DECIMAL
                                                    | NM_NATIVE)
                    cur.execute(SINGLE_INSERT, z)
                    nb_zones += 1
                    if nb_zones % 10000 == 0:
                        print_timer()

    print("Import done.")
    print_timer()
Esempio n. 12
0
def getResultsStats(file_name, dest):
    import ijson.backends.yajl2_cffi as ijson
    log.debug("getResultsStats()")
    file_path = "%s%s" % (defs.DIR_RESULTS, file_name)
    distributions = []
    with open(file_path, 'rb') as results:
        i = "1"
        while True:
            results.seek(0, 0)
            tmp =  [d for d in ijson.items(results, "reads-distribution-%s.item" % i)]
            if len(tmp) == 0:
                break
            else:
                distributions.append((i, tmp[0]))
                i += "0"
    dest['distribution'] = distributions
    return dest
Esempio n. 13
0
def getResultsStats(file_name, dest):
    import ijson.backends.yajl2_cffi as ijson
    log.debug("getResultsStats()")
    file_path = "%s%s" % (defs.DIR_RESULTS, file_name)
    distributions = []
    with open(file_path, 'rb') as results:
        i = "1"
        while True:
            results.seek(0, 0)
            tmp =  [d for d in ijson.items(results, "reads-distribution-%s.item" % i)]
            if len(tmp) == 0:
                break
            else:
                distributions.append((i, tmp[0]))
                i += "0"
    dest['distribution'] = distributions
    return dest
Esempio n. 14
0
def import_google(filename):
    logging.info('Importing from ' + filename)
    # Needs to be rb!
    with open(filename, 'rb') as f:
        data = ijson.items(f, 'locations.item')
        c = 0
        for o in data:
            c += 1
            p = (round(o['longitudeE7'] / 10000000,
                       precision), round(o['latitudeE7'] / 10000000,
                                         precision))
            d, t = tst_to_dt(int(o['timestampMs'][:-3]))
            make_history(p, d, t, False)
    f.close()
    logging.info(str(c) + ' items imported from ' + filename)
    logging.info('History size: ' + str(len(history)) + ' points')
    pickle.dump(history, open('history.pickle', 'wb'))
    write_js()
Esempio n. 15
0
def entries() -> Iterable[Entry]:
    inps = list(inputs())

    base: List[PathIsh] = ['arbtt-dump', '--format=json']

    cmds: List[List[PathIsh]]
    if len(inps) == 0:
        cmds = [base] # rely on default
    else:
        # otherise, 'merge' them
        cmds = [base + ['--logfile', f] for f in inps]

    import ijson.backends.yajl2_cffi as ijson # type: ignore
    from subprocess import Popen, PIPE
    for cmd in cmds:
        with Popen(cmd, stdout=PIPE) as p:
            out = p.stdout; assert out is not None
            for json in ijson.items(out, 'item'):
                yield Entry(json=json)
Esempio n. 16
0
def useAttributeAndScatter(f, att, max_nums=10000):
    psis = []
    labels = []
    item_num = 0
    for program in ijson.items(f, 'programs.item'):
        api_call = get_api(get_calls_from_ast(program['ast']['_nodes']))
        if api_call != 'N/A':
            labels.append(api_call)
            if att not in program:
                return
            psis.append(program[att])
            item_num += 1

        if item_num > max_nums:
            break

    psis = np.array(psis)
    name = "RE" if att == "b2" else att
    fitTSEandplot(psis, labels, name)
Esempio n. 17
0
def Deserializer(stream, **options):
    """
    Deserialize a stream of JSON data using iterative ijson so we may not load the whole string into memory.
    """
    if isinstance(stream, (bytes, six.string_types)):
        raise TypeError(
            'Use iloaddata/ijson with streams only. For strings use plain loaddata/json.loads'
        )

    try:
        objects = ijson.items(stream, 'item')
        for obj in PythonDeserializer(objects, **options):
            yield obj
    except GeneratorExit:
        raise
    except Exception as e:
        # Map to deserializer error
        six.reraise(DeserializationError, DeserializationError(e),
                    sys.exc_info()[2])
Esempio n. 18
0
def get_account_stats(conf, silent=True):
    system_account_names = set(get_system_account_names(conf))
    vests = list()
    total_steem = 0
    account_names = set()

    if not silent and not YAJL2_CFFI_AVAILABLE:
        print(
            "Warning: could not load yajl, falling back to default backend for ijson."
        )

    with open(conf["snapshot_file"], "rb") as f:
        for acc in ijson.items(f, "accounts.item"):
            if acc["name"] in system_account_names:
                continue

            account_names.add(acc["name"])
            vests.append(satoshis(acc["vesting_shares"]))
            total_steem += satoshis(acc["balance"])

            if not silent:
                n = len(account_names)
                if n % 100000 == 0:
                    print("Accounts read:", n)

    initial_account_stats = {
        "account_names": account_names,
        "total_vests": sum(vests),
        "total_steem": total_steem
    }

    proportions = get_proportions(initial_account_stats, conf)
    max_vests_per_account = proportions["max_vests_per_account"]

    for (i, v) in enumerate(vests):
        vests[i] = min(max_vests_per_account, v)

    return {
        "account_names": account_names,
        "total_vests": sum(vests),
        "total_steem": total_steem
    }
Esempio n. 19
0
def load_batches_dims(filename, model, batch_size=0):
    batch_index = 0
    batch_dims = []
    item_count = 0
    index = 0
    progress = utils.ProgressText()

    with open(filename, 'r') as jsonfile:
        items = ijson.items(jsonfile, 'item')

        for index, item in enumerate(items):
            if batch_size > 0:
                batch_index = int(index / batch_size)

            # start of batch
            if batch_index >= len(batch_dims):
                batch_dims.append({})

            get_item_dims(item['data'], model, batch_dims[batch_index])

            item_count += 1

            # end of batch
            if batch_size == item_count:
                for group in model.io_names:
                    batch_dims[batch_index][group] = \
                        [item_count] + batch_dims[batch_index][group] + [get_io_len(model, group)]

                item_count = 0

            if ((index + 1) % 100) == 0:
                progress.text('count items: ' + str(index + 1))

        progress.text('count items: ' + str(index + 1))
        sys.stdout.write('\n')

        if item_count != 0:
            for group in model.io_names:
                batch_dims[batch_index][group] = \
                    [item_count] + batch_dims[batch_index][group] + [get_io_len(model, group)]

    return batch_dims
Esempio n. 20
0
    def get_data(self, f, **kwargs):
        location = kwargs.get('meetlocatie', None)
        if location and isinstance(location, MeetLocatie):
            location = location.name

        params = kwargs.get('parameter', self.parm)
        if params:
            if isinstance(params, Parameter):
                params = [params.name]
            elif isinstance(params, six.string_types):
                params = [params]

        # find data for location and one of the parameters (always one parameter per json file)
        dfs = {}
        for ts in ijson.items(f, 'results.item'):
            pname = ts['name']
            if not pname in params:
                # only 1 parameter per file
                break
            lcode = ts['location']['organisation_code']
            if not location or lcode == location:
                events = ts['events']
                data = []
                if events:
                    for e in events:
                        tmin = e['min']
                        tmax = e['min']
                        tgem = (tmin + tmax) / 2 if (tmin and tmax) else None
                        t = e['timestamp']
                        data.append(
                            (datetime.datetime.utcfromtimestamp(t / 1000),
                             tgem))
                if data:
                    df = pd.DataFrame.from_records(data,
                                                   index=['datum'],
                                                   columns=['datum', pname])
                    dfs[lcode] = df
                    if location:
                        # requested for a single location
                        break
        return dfs
Esempio n. 21
0
def split(args):
    f = open(args.input_file[0], 'rb')
    assert(args.part>0 and args.part<100)
    start , end = (args.part-1)*args.step , args.part*args.step
    i = 0
    split_programs = []
    for program in ijson.items(f, 'programs.item'):
        print('Split part {} of size {} #Finished {} programs'.format(args.part, args.step, i), end='\r')
        if i == end:
            break
        if i < start:
            i += 1
            continue
        else:
            split_programs.append(program)
            i += 1

    print('')
    print("Writing to File")
    with open('{}-{:02d}.json'.format(args.input_file[0][:-5], args.part), 'w') as f:
        simplejson.dump({'programs': split_programs}, f, indent=2)
Esempio n. 22
0
    def load(cls, fp, override=None):
        """Load a generator.

        Parameters
        ----------
        fp : `file` or `str`
            Input file or file path.
        override : `dict` or `None`, optional
            Changes to loaded data (default: `None`).

        Returns
        -------
        `markovchain.base.MarkovBase`
            Loaded generator.
        """
        if isinstance(fp, str):
            with open(fp, 'r') as fp2:
                return cls.load(fp2, override)

        x = fp.read(1)
        fp.seek(0)

        if isinstance(x, str):
            data = json.load(fp)
        elif ijson is not None:
            try:
                data = next(ijson.items(fp, ''))
            except StopIteration:
                data = {}
        else:
            data = json.loads(fp.read().decode('utf-8'))

        if override is not None:
            extend(data, override)

        return cls(**data)
import cv2
import decimal
import json
import ijson.backends.yajl2_cffi as ijson
from sklearn_theano.feature_extraction import OverfeatTransformer

tr = OverfeatTransformer(output_layers=[8])

class DecimalEncoder(json.JSONEncoder):
    def default(self, o):
        if isinstance(o, decimal.Decimal):
            return float(o)
        return super(DecimalEncoder, self).default(o)

with open('../workspace/ds.json') as inh:
    with open('../workspace/ds_deep.json', 'w') as outh:
        ds = ijson.items(inh, 'item')
        outh.write('[')

        for i, item in enumerate(ds):
            print 'running', i+1
            if i > 0:
                outh.write(',')
            img = cv2.imread('set1/' + item['file'])
            img = cv2.resize(img, (231, 231))
            item['deep'] = tr.transform(img)[0].tolist()
            json.dump(item, outh, cls=DecimalEncoder)

        outh.write(']')
Esempio n. 24
0
    def calendars(self):
        calendars = requests.get(
            "http://api-tokyochallenge.odpt.org/api/v4/odpt:Calendar.json",
            params={"acl:consumerKey": self.apikey},
            timeout=30,
            stream=True)
        calendars.raise_for_status()
        calendars = ijson.items(calendars.raw, "item")

        # Get info on specific calendars
        calendar_dates = {}
        for calendar in calendars:
            calendar_id = calendar["owl:sameAs"].split(":")[1]
            if "odpt:day" in calendar and calendar["odpt:day"] != []:
                dates = [
                    datetime.strptime(i, "%Y-%m-%d").date()
                    for i in calendar["odpt:day"]
                ]
                dates = [
                    i for i in dates if self.startdate <= i <= self.enddate
                ]
                for date in dates:
                    if date not in calendar_dates: calendar_dates[date] = set()
                    calendar_dates[date].add(calendar_id)

        # Get info about holidays
        if self.startdate.year == self.enddate.year:
            holidays = _holidays(self.startdate.year)
        else:
            holidays = _holidays(self.startdate.year) | _holidays(
                self.enddate.year)

        # Open file
        buffer = open("gtfs/calendar_dates.txt",
                      mode="w",
                      encoding="utf8",
                      newline="")
        writer = csv.DictWriter(buffer,
                                GTFS_HEADERS["calendar_dates.txt"],
                                extrasaction="ignore")
        writer.writeheader()

        # Dump data
        for route, services in self.used_calendars.items():
            if self.verbose: print("\033[1A\033[KParsing calendars:", route)
            working_date = copy(self.startdate)
            while working_date <= self.enddate:
                active_services = []

                if calendar_dates.get(working_date,
                                      set()).intersection(services):
                    active_services = [
                        i for i in calendar_dates[working_date].intersection(
                            services)
                    ]

                elif working_date in holidays and "Holiday" in services:
                    active_services = ["Holiday"]

                elif working_date.isoweekday(
                ) == 7 and working_date not in holidays:
                    if "Sunday" in services: active_services = ["Sunday"]
                    elif "Holiday" in services: active_services = ["Sunday"]

                elif working_date.isoweekday(
                ) == 6 and working_date not in holidays and "Saturday" in services:
                    active_services = ["Saturday"]

                elif working_date.isoweekday(
                ) == 5 and working_date not in holidays and "Friday" in services:
                    active_services = ["Friday"]

                elif working_date.isoweekday(
                ) == 4 and working_date not in holidays and "Thursday" in services:
                    active_services = ["Thursday"]

                elif working_date.isoweekday(
                ) == 3 and working_date not in holidays and "Wednesday" in services:
                    active_services = ["Wednesday"]

                elif working_date.isoweekday(
                ) == 2 and working_date not in holidays and "Tuesday" in services:
                    active_services = ["Tuesday"]

                elif working_date.isoweekday(
                ) == 1 and working_date not in holidays and "Monday" in services:
                    active_services = ["Monday"]

                elif (working_date.isoweekday() >= 6 or working_date
                      in holidays) and "SaturdayHoliday" in services:
                    active_services = ["SaturdayHoliday"]

                elif working_date.isoweekday(
                ) <= 5 and working_date not in holidays and "Weekday" in services:
                    active_services = ["Weekday"]

                if active_services:
                    for service in active_services:
                        writer.writerow({
                            "service_id": route + "/" + service,
                            "date": working_date.strftime("%Y%m%d"),
                            "exception_type": 1
                        })
                working_date += timedelta(days=1)

        calendars.close()
        buffer.close()
Esempio n. 25
0
    def trips(self):
        """Parse trips & stop_times"""
        # Some variables
        available_calendars = self._legal_calendars()

        # Get all trips
        trips = requests.get(
            "http://api-tokyochallenge.odpt.org/api/v4/odpt:BusTimetable.json",
            params={"acl:consumerKey": self.apikey},
            timeout=90,
            stream=True)
        trips.raise_for_status()
        trips = ijson.items(trips.raw, "item")

        # Open GTFS trips
        buffer_trips = open("gtfs/trips.txt",
                            mode="w",
                            encoding="utf8",
                            newline="")
        writer_trips = csv.DictWriter(buffer_trips,
                                      GTFS_HEADERS["trips.txt"],
                                      extrasaction="ignore")
        writer_trips.writeheader()

        buffer_times = open("gtfs/stop_times.txt",
                            mode="w",
                            encoding="utf8",
                            newline="")
        writer_times = csv.DictWriter(buffer_times,
                                      GTFS_HEADERS["stop_times.txt"],
                                      extrasaction="ignore")
        writer_times.writeheader()

        # Iteratr over trips
        for trip in trips:
            operator = trip["odpt:operator"].split(":")[1]
            pattern_id = trip["odpt:busroutePattern"].split(":")[1]

            # Get route_id
            if pattern_id in self.pattern_map:
                route_id = self.pattern_map[pattern_id]

            else:
                if operator == "JRBusKanto":
                    route_id = operator + "." + \
                               pattern_id.split(".")[1] + "." + \
                               pattern_id.split(".")[2]

                else:
                    route_id = operator + "." + pattern_id.split(".")[1]

            trip_id = trip["owl:sameAs"].split(":")[1]
            calendar = trip["odpt:calendar"].split(":")[1]
            service_id = route_id + "/" + calendar

            if self.verbose: print("\033[1A\033[KParsing times:", trip_id)

            # Ignore non-parsed routes and non_active calendars
            if operator not in self.operators:
                continue

            if route_id not in self.parsed_routes:
                warn(
                    "\033[1mno route for pattern {}\033[0m".format(pattern_id))
                continue

            if calendar not in available_calendars:
                continue

            # Add calendar
            if route_id not in self.used_calendars:
                self.used_calendars[route_id] = set()
            self.used_calendars[route_id].add(calendar)

            # Ignore one-stop trips
            if len(trip["odpt:busTimetableObject"]) < 2:
                continue

            # Bus headsign
            headsigns = [
                i["odpt:destinationSign"]
                for i in trip["odpt:busTimetableObject"]
                if i.get("odpt:destinationSign") != None
            ]

            if headsigns:
                trip_headsign = headsigns[0]

            else:
                last_stop_id = trip["odpt:busTimetableObject"][-1][
                    "odpt:busstopPole"].split(":")[1]

                if last_stop_id in self.stop_names:
                    trip_headsign = self.stop_names[last_stop_id]

                else:
                    trip_headsign = re.sub(r"(?!^)([A-Z][a-z]+)", r" \1",
                                           last_stop_id.split(".")[1])
                    warn("\033[1mno name for stop {}\033[0m".format(
                        last_stop_id))
                    self.stop_names[last_stop_id] = trip_headsign

            trip_headsign_en = self.english_strings.get(trip_headsign, "")

            # Non-step bus (wheelchair accesibility)
            if any([
                    i.get("odpt:isNonStepBus") == False
                    for i in trip["odpt:busTimetableObject"]
            ]):
                wheelchair = "2"

            elif any([
                    i.get("odpt:isNonStepBus") == True
                    for i in trip["odpt:busTimetableObject"]
            ]):
                wheelchair = "1"

            else:
                wheelchair = "0"

            # Do we start after midnight?
            prev_departure = _Time(0)
            if trip["odpt:busTimetableObject"][0].get("odpt:isMidnight",
                                                      False):
                first_time = trip["odpt:busTimetableObject"][0].get("odpt:departureTime") or \
                             trip["odpt:busTimetableObject"][0].get("odpt:arrivalTime")
                # If that's a night bus, and the trip starts before 6 AM
                # Add 24h to departure, as the trip starts "after-midnight"
                if int(first_time.split(":")[0]) < 6:
                    prev_departure = _Time(86400)

            # Filter stops to include only active stops
            trip["odpt:busTimetableObject"] = sorted(
                [
                    i for i in trip["odpt:busTimetableObject"]
                    if i["odpt:busstopPole"].split(":")[1] in self.valid_stops
                ],
                key=lambda i: i["odpt:index"])

            # Ignore trips with less then 1 stop
            if len(trip["odpt:busTimetableObject"]) <= 1:
                #warn("\033[1mno correct stops in trip {}\033[0m".format(trip_id))
                continue

            # Write to trips.txt
            writer_trips.writerow({
                "route_id": route_id,
                "trip_id": trip_id,
                "service_id": service_id,
                "trip_headsign": trip_headsign,
                "trip_pattern_id": pattern_id,
                "wheelchair_accessible": wheelchair
            })

            # Times
            for idx, stop_time in enumerate(trip["odpt:busTimetableObject"]):
                stop_id = stop_time["odpt:busstopPole"].split(":")[1]

                # Get time
                arrival = stop_time.get("odpt:arrivalTime") or stop_time.get(
                    "odpt:departureTime")
                departure = stop_time.get(
                    "odpt:departureTime") or stop_time.get("odpt:arrivalTime")

                if arrival: arrival = _Time.from_str(arrival)
                if departure: departure = _Time.from_str(departure)

                # Be sure arrival and departure exist
                if not (arrival and departure): continue

                # Fix for after-midnight trips. GTFS requires "24:23", while JSON data contains "00:23"
                if arrival < prev_departure: arrival += 86400
                if departure < arrival: departure += 86400
                prev_departure = copy(departure)

                # Can get on/off?
                # None → no info → fallbacks to True, but bool(None) == False, so we have to explicitly comapre the value to False
                pickup = "1" if stop_time.get(
                    "odpt:CanGetOn") == False else "0"
                dropoff = "1" if stop_time.get(
                    "odpt:CanGetOff") == False else "0"

                writer_times.writerow({
                    "trip_id": trip_id,
                    "stop_sequence": idx,
                    "stop_id": stop_id,
                    "arrival_time": str(arrival),
                    "departure_time": str(departure),
                    "pickup_type": pickup,
                    "drop_off_type": dropoff
                })

        trips.close()
        buffer_trips.close()
        buffer_times.close()
Esempio n. 26
0
    def routes(self):
        patterns = requests.get(
            "http://api-tokyochallenge.odpt.org/api/v4/odpt:BusroutePattern.json",
            params={"acl:consumerKey": self.apikey},
            timeout=30,
            stream=True)
        patterns.raise_for_status()
        patterns = ijson.items(patterns.raw, "item")

        buffer = open("gtfs/routes.txt", mode="w", encoding="utf8", newline="")
        writer = csv.DictWriter(buffer,
                                GTFS_HEADERS["routes.txt"],
                                extrasaction="ignore")
        writer.writeheader()

        self.parsed_routes = set()

        for pattern in patterns:
            pattern_id = pattern["owl:sameAs"].split(":")[1]

            if type(pattern["odpt:operator"]) is list:
                operator = pattern["odpt:operator"][0].split(":")[1]
            else:
                operator = pattern["odpt:operator"].split(":")[1]

            if operator not in self.operators: continue
            if self.verbose:
                print("\033[1A\033[KParsing route patterns:", pattern_id)

            # Get route_id
            if "odpt:busroute" in pattern:
                route_id = pattern["odpt:busroute"].split(":")[1]

            else:
                if operator == "JRBusKanto":
                    route_id = operator + "." + \
                               pattern_id.split(".")[1] + "." + \
                               pattern_id.split(".")[2]

                else:
                    route_id = operator + "." + pattern_id.split(".")[1]

            # Map pattern → route_id, as BusTimetable references patterns instead of routes
            self.pattern_map[pattern_id] = route_id

            # Get color from bus_colors.csv
            route_code = pattern["dc:title"].split(" ")[
                0]  # Toei appends direction to BusroutePattern's dc:title
            route_color, route_text = self.operators[operator]

            # Output to GTFS
            if route_id not in self.parsed_routes:
                self.parsed_routes.add(route_id)
                writer.writerow({
                    "agency_id": operator,
                    "route_id": route_id,
                    "route_short_name": route_code,
                    "route_type": 3,
                    "route_color": route_color,
                    "route_text_color": route_text
                })

        patterns.close()
        buffer.close()
Esempio n. 27
0
def load(secure,hostname,url,schema,table,postdata,condition,verbose,rowcount):
  show("begin "+hostname+" "+url+" "+schema+" "+table+" "+(postdata or "")+" "+(condition or ""))
  if secure:
    address = "https://"+hostname+url
  else:
    address = "http://"+hostname+url
  show("load from "+address)

  reqheaders = {'Content-Type': 'application/json'}
  # api credentials from env vars
  if os.getenv("API_USERNAME"):
    show("using authentication")
    apiuser = os.getenv("API_USERNAME")
    apipass = os.getenv("API_PASSWORD")
    reqheaders['Authorization'] = 'Basic %s' % base64.b64encode(apiuser+":"+apipass)

  # automatic POST with (post)data
  print("value used for , -r, --rowcount=", rowcount)
  request = urllib2.Request(address, data=postdata, headers=reqheaders)
  try:
    response = urllib2.urlopen(request)
  except httplib.IncompleteRead as e:
    show('IncompleteRead exception.')
    show('Received: %d'%(e.partial))
    sys.exit(2)
  except urllib2.HTTPError as e:
    show('The server couldn\'t fulfill the request.')
    show('Error code: %d'%(e.code))
    sys.exit(2)
  except urllib2.URLError as e:
    show('We failed to reach a server.')
    show('Reason: %s'%(e.reason))
    sys.exit(2)
  else:
    # everything is fine
    show("api call OK")

  # remove data conditionally, otherwise empty
  # merge operation could be considered here...
  if condition:
    show("remove from %s.%s with condition '%s'"%(schema,table,condition))
    dboperator.execute("DELETE FROM %s.%s WHERE %s"%(schema,table,condition))
  else:
    show("empty %s.%s"%(schema,table))
    dboperator.empty(schema,table)

  show("insert data")
  cnt=0
  manycount = 0
  rows = []

  for row in ijson.items(response,'item'):
        cnt+=1
        manycount+=1
        # show some sign of being alive
        if cnt%100 == 0:
          sys.stdout.write('.')
          sys.stdout.flush()
        if cnt%1000 == 0:
          show("-- %d" % (cnt))
        if verbose: show("%d -- %s"%(cnt,row))

        # find out which columns to use on insert
        dboperator.resetcolumns(row)

        # flatten arrays/lists
        for col in row:
            if type(row[col]) is list:
                row[col] = ''.join(map(str,json.dumps(row[col])))
        rows.append(row)
        if cnt == 1:
            dboperator.insert(address,schema,table,row)
            manycount = 0
            rows = []
        if cnt > 1:
            if manycount == rowcount:
                insert(address,schema,table,rows)
                manycount = 0
                rows = []
  if len(rows) <= manycount and len(rows) > 0:
      insert(address,schema,table,rows)
      rows = []
      manycount = 0

  show("wrote %d"%(cnt))
  show("ready")
Esempio n. 28
0
 def load_json():
     objects = ijson.items(get_stdin(), "")
     ds.append(next(objects))
Esempio n. 29
0
def ijsonLoad(filename,
              tagsGidRange=None,
              connsGidRange=None,
              loadTags=True,
              loadConns=True,
              tagFormat=None,
              connFormat=None,
              saveTags=None,
              saveConns=None):
    """
    Function for/to <short description of `netpyne.sim.load.ijsonLoad`>

    Parameters
    ----------
    filename : <type>
        <Short description of filename>
        **Default:** *required*

    tagsGidRange : <``None``?>
        <Short description of tagsGidRange>
        **Default:** ``None``
        **Options:** ``<option>`` <description of option>

    connsGidRange : <``None``?>
        <Short description of connsGidRange>
        **Default:** ``None``
        **Options:** ``<option>`` <description of option>

    loadTags : bool
        <Short description of loadTags>
        **Default:** ``True``
        **Options:** ``<option>`` <description of option>

    loadConns : bool
        <Short description of loadConns>
        **Default:** ``True``
        **Options:** ``<option>`` <description of option>

    tagFormat : <``None``?>
        <Short description of tagFormat>
        **Default:** ``None``
        **Options:** ``<option>`` <description of option>

    connFormat : <``None``?>
        <Short description of connFormat>
        **Default:** ``None``
        **Options:** ``<option>`` <description of option>

    saveTags : <``None``?>
        <Short description of saveTags>
        **Default:** ``None``
        **Options:** ``<option>`` <description of option>

    saveConns : <``None``?>
        <Short description of saveConns>
        **Default:** ``None``
        **Options:** ``<option>`` <description of option>


    """

    # requires: 1) pip install ijson, 2) brew install yajl
    from .. import sim
    import ijson.backends.yajl2_cffi as ijson
    import json
    from time import time

    tags, conns = {}, {}

    if connFormat:
        conns['format'] = connFormat
    if tagFormat:
        tags['format'] = tagFormat

    with open(filename, 'rb') as fd:
        start = time()
        print('Loading data ...')
        objs = ijson.items(fd, 'net.cells.item')
        if loadTags and loadConns:
            print('Storing tags and conns ...')
            for cell in objs:
                if tagsGidRange == None or cell['gid'] in tagsGidRange:
                    print('Cell gid: %d' % (cell['gid']))
                    if tagFormat:
                        tags[int(cell['gid'])] = [
                            cell['tags'][param] for param in tagFormat
                        ]
                    else:
                        tags[int(cell['gid'])] = cell['tags']
                    if connsGidRange == None or cell['gid'] in connsGidRange:
                        if connFormat:
                            conns[int(cell['gid'])] = [[
                                conn[param] for param in connFormat
                            ] for conn in cell['conns']]
                        else:
                            conns[int(cell['gid'])] = cell['conns']
        elif loadTags:
            print('Storing tags ...')
            if tagFormat:
                tags.update({
                    int(cell['gid']):
                    [cell['tags'][param] for param in tagFormat]
                    for cell in objs
                    if tagsGidRange == None or cell['gid'] in tagsGidRange
                })
            else:
                tags.update({
                    int(cell['gid']): cell['tags']
                    for cell in objs
                    if tagsGidRange == None or cell['gid'] in tagsGidRange
                })
        elif loadConns:
            print('Storing conns...')
            if connFormat:
                conns.update({
                    int(cell['gid']): [[conn[param] for param in connFormat]
                                       for conn in cell['conns']]
                    for cell in objs
                    if connsGidRange == None or cell['gid'] in connsGidRange
                })
            else:
                conns.update({
                    int(cell['gid']): cell['conns']
                    for cell in objs
                    if connsGidRange == None or cell['gid'] in connsGidRange
                })

        print('time ellapsed (s): ', time() - start)

    tags = utils.decimalToFloat(tags)
    conns = utils.decimalToFloat(conns)

    if saveTags and tags:
        outFilename = saveTags if isinstance(
            saveTags, basestring) else 'filename'[:-4] + '_tags.json'
        print('Saving tags to %s ...' % (outFilename))
        sim.saveJSON(outFilename, {'tags': tags})
    if saveConns and conns:
        outFilename = saveConns if isinstance(
            saveConns, basestring) else 'filename'[:-4] + '_conns.json'
        print('Saving conns to %s ...' % (outFilename))
        sim.saveJSON(outFilename, {'conns': conns})

    return tags, conns
Esempio n. 30
0
def update_accounts(account_stats, conf, keydb, silent=True):
    crea_max_authority_membership = conf.get("crea_max_authority_membership",
                                             CREA_MAX_AUTHORITY_MEMBERSHIP)
    crea_address_prefix = conf.get("crea_address_prefix", CREA_ADDRESS_PREFIX)
    system_account_names = set(get_system_account_names(conf))
    account_names = account_stats["account_names"]
    num_accounts = len(account_names)
    porter_wif = keydb.get_privkey("porter")
    tnman = conf["accounts"]["manager"]["name"]
    accounts_updated = 0

    with open(conf["snapshot_file"], "rb") as f:
        for a in ijson.items(f, "accounts.item"):
            if a["name"] in system_account_names:
                continue

            cur_owner_auth = a["owner"]
            new_owner_auth = cur_owner_auth.copy()
            cur_active_auth = a["active"]
            new_active_auth = cur_active_auth.copy()
            cur_posting_auth = a["posting"]
            new_posting_auth = cur_posting_auth.copy()

            # filter to only include existing accounts
            for aw in cur_owner_auth["account_auths"][:(
                    crea_max_authority_membership - 1)]:
                if (aw[0] not in account_names) or (aw[0]
                                                    in system_account_names):
                    new_owner_auth["account_auths"].remove(aw)
            for aw in cur_active_auth["account_auths"][:(
                    crea_max_authority_membership - 1)]:
                if (aw[0] not in account_names) or (aw[0]
                                                    in system_account_names):
                    new_active_auth["account_auths"].remove(aw)
            for aw in cur_posting_auth["account_auths"][:(
                    crea_max_authority_membership - 1)]:
                if (aw[0] not in account_names) or (aw[0]
                                                    in system_account_names):
                    new_posting_auth["account_auths"].remove(aw)

            # add tnman to account_auths
            new_owner_auth["account_auths"].append(
                [tnman, cur_owner_auth["weight_threshold"]])
            new_active_auth["account_auths"].append(
                [tnman, cur_active_auth["weight_threshold"]])
            new_posting_auth["account_auths"].append(
                [tnman, cur_posting_auth["weight_threshold"]])

            # substitute prefix for key_auths
            new_owner_auth["key_auths"] = [[
                crea_address_prefix + k[3:], w
            ] for k, w in new_owner_auth["key_auths"]
                                           [:crea_max_authority_membership]]
            new_active_auth["key_auths"] = [[
                crea_address_prefix + k[3:], w
            ] for k, w in new_active_auth["key_auths"]
                                            [:crea_max_authority_membership]]
            new_posting_auth["key_auths"] = [[
                crea_address_prefix + k[3:], w
            ] for k, w in new_posting_auth["key_auths"]
                                             [:crea_max_authority_membership]]

            ops = [{
                "type": "account_update_operation",
                "value": {
                    "account": a["name"],
                    "owner": new_owner_auth,
                    "active": new_active_auth,
                    "posting": new_posting_auth,
                    "memo_key": "TST" + a["memo_key"][3:],
                    "json_metadata": a["json_metadata"],
                }
            }]

            accounts_updated += 1
            if not silent:
                if accounts_updated % 100000 == 0:
                    print("Accounts updated:", accounts_updated)
                    print(
                        "\t", '%.2f%% complete' %
                        (accounts_updated / num_accounts * 100.0))

            yield {"operations": ops, "wif_sigs": [porter_wif]}

    if not silent:
        print("Accounts updated:", accounts_updated)
        print("\t100.00%% complete")
Esempio n. 31
0
def create_accounts(account_stats, conf, keydb, silent=True):
    crea_address_prefix = conf.get("crea_address_prefix", CREA_ADDRESS_PREFIX)
    system_account_names = set(get_system_account_names(conf))
    proportions = get_proportions(account_stats, conf, silent)
    min_vesting_per_account = proportions["min_vesting_per_account"]
    vest_conversion_factor = proportions["vest_conversion_factor"]
    crea_conversion_factor = proportions["crea_conversion_factor"]
    account_names = account_stats["account_names"]
    num_accounts = len(account_names)
    porter = conf["accounts"]["porter"]["name"]
    porter_wif = keydb.get_privkey("porter")
    create_auth = {
        "account_auths": [["porter", 1]],
        "key_auths": [],
        "weight_threshold": 1
    }
    accounts_created = 0

    with open(conf["snapshot_file"], "rb") as f:
        for a in ijson.items(f, "accounts.item"):
            if a["name"] in system_account_names:
                continue

            vesting_amount = (satoshis(a["vesting_shares"]) *
                              vest_conversion_factor) // DENOM
            transfer_amount = (satoshis(a["balance"]) *
                               crea_conversion_factor) // DENOM
            name = a["name"]
            vesting_amount = max(vesting_amount, min_vesting_per_account)

            ops = [{
                "type": "account_create_operation",
                "value": {
                    "fee": {
                        "amount": "0",
                        "precision": 3,
                        "nai": "@@000000021"
                    },
                    "creator": porter,
                    "new_account_name": name,
                    "owner": create_auth,
                    "active": create_auth,
                    "posting": create_auth,
                    "memo_key": crea_address_prefix + a["memo_key"][3:],
                    "json_metadata": "",
                }
            }, {
                "type": "transfer_to_vesting_operation",
                "value": {
                    "from": porter,
                    "to": name,
                    "amount": amount(vesting_amount),
                }
            }]
            if transfer_amount > 0:
                ops.append({
                    "type": "transfer_operation",
                    "value": {
                        "from": porter,
                        "to": name,
                        "amount": amount(transfer_amount),
                        "memo": "Ported balance",
                    }
                })

            accounts_created += 1
            if not silent:
                if accounts_created % 100000 == 0:
                    print("Accounts created:", accounts_created)
                    print(
                        "\t", '%.2f%% complete' %
                        (accounts_created / num_accounts * 100.0))

            yield {"operations": ops, "wif_sigs": [porter_wif]}

    if not silent:
        print("Accounts created:", accounts_created)
        print("\t100.00%% complete")
Esempio n. 32
0
        except IOError:
            print('ERROR: could not open file ' + fingerprintsfile_old_name)
            exit()
    else:
        print(
            'INFO: no old fingerprints file name passed, starting from scratch'
        )
        fingerprints_old = dict()

    fingerprints_new = dict()
    idSet = set()  # to check uniqueness. Faster than using a list or dict.
    duplicateIds = list()

    # CUSTOM IMPLEMENTATION FROM HERE

    jsonObjects = ijson.items(fullfile_new, 'messages.item.markets.item')

    deltafile.write('{"markets":[\n')

    objCount = 0
    deltacount = 0
    marketcount = 0
    # Half-streaming way: parse the complete JSON of a market and iterate over products inside that.
    # (full streaming would be pretty complex concerning how to
    markets = (o for o in jsonObjects)
    for market in markets:
        prodcount = 0
        if marketcount > 0: deltafile.write('\n,')
        marketcount += 1

        marketId = str(market['wwIdent'])
Esempio n. 33
0
import ijson.backends.yajl2_cffi as ijson
import os
import re
import time

import pandas as pd

start = time.time()
PATH = "../data"
records = list()
for filename in sorted(os.listdir(PATH)):
    data = None
    try:
        with open(os.path.join(PATH, filename)) as json_file:
            data = json_file.read()
        edges = next(ijson.items(data, "data.feedback.display_comments.edges"))
        for edge in edges:
            records.append({
                'id': edge['node']['id'],
                'author_id': edge['node']['author']['id'],
                'author_name': edge['node']['author']['name'],
                'author_gender': edge['node']['author'].get('gender'),
                'timestamp': edge['node']['created_time'],
                'reactions': edge['node']['feedback']['reactors']['count'],
                'url': edge['node']['url'],
                'comment': edge['node']['body']['text'] if edge['node']['body'] else None,
            })
    except Exception as e:
        print(f"Skipping {filename}, {e}")
        break
        continue
Esempio n. 34
0
import cv2
import decimal
import json
import ijson.backends.yajl2_cffi as ijson
from sklearn_theano.feature_extraction import OverfeatTransformer

tr = OverfeatTransformer(output_layers=[8])


class DecimalEncoder(json.JSONEncoder):
    def default(self, o):
        if isinstance(o, decimal.Decimal):
            return float(o)
        return super(DecimalEncoder, self).default(o)


with open('../workspace/ds.json') as inh:
    with open('../workspace/ds_deep.json', 'w') as outh:
        ds = ijson.items(inh, 'item')
        outh.write('[')

        for i, item in enumerate(ds):
            print 'running', i + 1
            if i > 0:
                outh.write(',')
            img = cv2.imread('set1/' + item['file'])
            img = cv2.resize(img, (231, 231))
            item['deep'] = tr.transform(img)[0].tolist()
            json.dump(item, outh, cls=DecimalEncoder)

        outh.write(']')
Esempio n. 35
0
  else:
    # everything is fine
    show("api call OK")

  # remove data conditionally, otherwise empty
  # merge operation could be considered here...
  if condition:
    show("remove from %s.%s with condition '%s'"%(schema,table,condition))
    dboperator.execute("DELETE FROM %s.%s WHERE %s"%(schema,table,condition))
  else:
    show("empty %s.%s"%(schema,table))
    dboperator.empty(schema,table)

  show("insert data")
  cnt=0
  for row in ijson.items(response,'item'):
    cnt+=1
    # show some sign of being alive
    if cnt%100 == 0:
      sys.stdout.write('.')
      sys.stdout.flush()
    if cnt%1000 == 0:
      show("-- %d" % (cnt))
    if verbose: show("%d -- %s"%(cnt,row))

    # find out which columns to use on insert
    dboperator.resetcolumns(row)

    # flatten arrays/lists
    for col in row:
      if type(row[col]) is list:
Esempio n. 36
0
    def stops(self):
        """Parse stops"""
        # Get list of stops
        stops = requests.get(
            "http://api-tokyochallenge.odpt.org/api/v4/odpt:BusstopPole.json",
            params={"acl:consumerKey": self.apikey},
            timeout=30,
            stream=True)
        stops.raise_for_status()
        stops = ijson.items(stops.raw, "item")

        # Open files
        buffer = open("gtfs/stops.txt", mode="w", encoding="utf8", newline="")
        writer = csv.DictWriter(buffer,
                                GTFS_HEADERS["stops.txt"],
                                extrasaction="ignore")
        writer.writeheader()

        broken_stops_buff = open("broken_stops.csv",
                                 mode="w",
                                 encoding="utf8",
                                 newline="")
        broken_stops_wrtr = csv.writer(broken_stops_buff)
        broken_stops_wrtr.writerow(
            ["stop_id", "stop_name", "stop_name_en", "stop_code"])

        # Iterate over stops
        for stop in stops:
            stop_id = stop["owl:sameAs"].split(":")[1]
            stop_code = stop.get("odpt:busstopPoleNumber", "")
            stop_name = stop["dc:title"]
            stop_name_en = self.carmel_to_title(stop_id.split(".")[1])

            if self.verbose: print("\033[1A\033[KParsing stops:", stop_id)

            self.stop_names[stop_id] = stop_name

            # Stop name translation
            if stop_name_en: self.english_strings[stop_name] = stop_name_en

            # Stop operators
            if type(stop["odpt:operator"]) is list:
                operators = [i.split(":")[1] for i in stop["odpt:operator"]]
            else:
                operators = [stop["odpt:operator"].split(":")[1]]

            # Ignore stops that belong to ignored agencies
            if not set(operators).intersection(self.operators):
                continue

            # Correct stop position
            if "geo:lat" in stop and "geo:long" in stop:
                stop_lat = stop["geo:lat"]
                stop_lon = stop["geo:long"]

            # Output to GTFS or to incorrect stops
            if stop_lat and stop_lon:
                self.valid_stops.add(stop_id)
                writer.writerow({
                    "stop_id": stop_id,
                    "stop_code": stop_code,
                    "zone_id": stop_id,
                    "stop_name": stop_name,
                    "stop_lat": stop_lat,
                    "stop_lon": stop_lon,
                })

            else:
                broken_stops_wrtr.writerow(
                    [stop_id, stop_name, stop_name_en, stop_code])

        stops.close()
        buffer.close()
totalComments = 5730430
subreddits = json.load(open('subreddits.json'))

# Keep 10 biggest subreddits
subreddits.sort(key=lambda s: s['subscribers'], reverse=True)
subreddits = subreddits[:20]

totalParsedComments = 0
parsingStartTime = time.time()

# Parsing
for subreddit in subreddits:
    subreddit['usersDict'] = {}
    print('Loading', subreddit['name'])
    with open('data/' + subreddit['name'] + '.json', 'rb') as data:
        for comment in ijson.items(data, 'item'):
            user = getUser(subreddit, comment['author'])
            user['commentsSentiments'].append(comment['sentiment']['compound'])
            user['sentimentAverage'] += comment['sentiment']['compound']
            user['scoreAverage'] += comment['score']
            user['lengthAverage'] += comment['textlen']
            user['commentCount'] += 1
            if comment['sentiment']['compound'] <= -0.5:
                user['negativeCommentCount'] += 1
            elif (comment['sentiment']['compound'] > -0.5
                  and comment['sentiment']['compound'] < 0.5):
                user['neutralCommentCount'] += 1
            elif comment['sentiment']['compound'] >= 0.5:
                user['positiveCommentCount'] += 1

            totalParsedComments += 1
Esempio n. 38
0
 def get_parameters(self, f):
     params = {}
     for obs in ijson.items(f,'results.item.observation_type'):
         params[obs['code']] = {'description':obs['parameter_short_display_name'], 'unit': obs['referenced_unit_short_display_name']}
         break # one single parameter per file
     return params