コード例 #1
0
    def related_topics(self):
        """Request data from Google's Related Topics section and return a dictionary of dataframes

        If no top and/or rising related topics are found, the value for the key "top" and/or "rising" will be None
        """

        # make the request
        related_payload = dict()
        result_dict = dict()
        for request_json in self.related_topics_widget_list:
            # ensure we know which keyword we are looking at rather than relying on order
            if request_json['request']['restriction'].get(
                    "complexKeywordsRestriction") is not None:
                kw = request_json['request']['restriction'][
                    'complexKeywordsRestriction']['keyword'][0]['value']
            else:
                kw = ""

            # convert to string as requests will mangle
            related_payload['req'] = json.dumps(request_json['request'])
            related_payload['token'] = request_json['token']
            related_payload['tz'] = self.tz

            # parse the returned json
            req_json = self._get_data(
                url=TrendReq.RELATED_QUERIES_URL,
                method=TrendReq.GET_METHOD,
                trim_chars=5,
                params=related_payload,
            )

            # top topics
            try:
                top_list = req_json['default']['rankedList'][0][
                    'rankedKeyword']
                df_top = pd.DataFrame(
                    [nested_to_record(d, sep='_') for d in top_list])
            except KeyError:
                # in case no top topics are found, the lines above will throw a KeyError
                df_top = None

            # rising topics
            try:
                rising_list = req_json['default']['rankedList'][1][
                    'rankedKeyword']
                df_rising = pd.DataFrame(
                    [nested_to_record(d, sep='_') for d in rising_list])
            except KeyError:
                # in case no rising topics are found, the lines above will throw a KeyError
                df_rising = None

            result_dict[kw] = {'rising': df_rising, 'top': df_top}
        return result_dict
コード例 #2
0
ファイル: test_normalize.py プロジェクト: JLL-Benson/CHN_DQ
 def test_nonetype_multiple_levels(self):
     # GH21158: If inner level json has a key with a null value
     # make sure it doesnt do a new_d.pop twice and except
     data = {
         "id": None,
         "location": {
             "id": None,
             "country": {
                 "id": None,
                 "state": {
                     "id": None,
                     "town.info": {
                         "region": None,
                         "x": 49.151580810546875,
                         "y": -33.148521423339844,
                         "z": 27.572303771972656}}}
         }
     }
     result = nested_to_record(data)
     expected = {
         'id': None,
         'location.id': None,
         'location.country.id': None,
         'location.country.state.id': None,
         'location.country.state.town.info.region': None,
         'location.country.state.town.info.x': 49.151580810546875,
         'location.country.state.town.info.y': -33.148521423339844,
         'location.country.state.town.info.z': 27.572303771972656}
     assert result == expected
コード例 #3
0
def calc_metrics_classification(target,
                                predictions,
                                target_scores=None,
                                jsd_score=None):

    if target_scores is not None:
        assert predictions.squeeze(1).shape == target_scores.shape
        tvdist = tvd(predictions.squeeze(1), target_scores)

    if predictions.shape[-1] == 1:
        predictions = predictions[:, 0]
        predictions = np.array([1 - predictions, predictions]).T

    predict_classes = np.argmax(predictions, axis=-1)

    if len(np.unique(target)) < 4:
        rep = nested_to_record(classification_report(target,
                                                     predict_classes,
                                                     output_dict=True),
                               sep='/')
    else:
        rep = {}
    rep.update({'accuracy': accuracy_score(target, predict_classes)})

    if jsd_score:
        rep.update({'js_divergence': jsd_score})
    if target_scores is not None:
        rep.update({'TVD': tvdist})

    if predictions.shape[-1] == 2:
        rep.update({'roc_auc': roc_auc_score(target, predictions[:, 1])})
        rep.update(
            {"pr_auc": average_precision_score(target, predictions[:, 1])})
    return rep
コード例 #4
0
ファイル: test_normalize.py プロジェクト: zyazxr/pandas
    def test_one_level_deep_flattens(self):
        data = dict(flat1=1, dict1=dict(c=1, d=2))

        result = nested_to_record(data)
        expected = {"dict1.c": 1, "dict1.d": 2, "flat1": 1}

        assert result == expected
コード例 #5
0
ファイル: test_normalize.py プロジェクト: birgirp/golfvaktin
    def test_donot_drop_nonevalues(self):
        # GH21356
        data = [{
            'info': None,
            'author_name': {
                'first': 'Smith',
                'last_name': 'Appleseed'
            }
        }, {
            'info': {
                'created_at': '11/08/1993',
                'last_updated': '26/05/2012'
            },
            'author_name': {
                'first': 'Jane',
                'last_name': 'Doe'
            }
        }]
        result = nested_to_record(data)
        expected = [{
            'info': None,
            'author_name.first': 'Smith',
            'author_name.last_name': 'Appleseed'
        }, {
            'author_name.first': 'Jane',
            'author_name.last_name': 'Doe',
            'info.created_at': '11/08/1993',
            'info.last_updated': '26/05/2012'
        }]

        assert result == expected
コード例 #6
0
ファイル: test_normalize.py プロジェクト: BinEP/MHealthTest
    def test_nonetype_dropping(self):
        # GH20030: Checks that None values are dropped in nested_to_record
        # to prevent additional columns of nans when passed to DataFrame
        data = [{
            'info': None,
            'author_name': {
                'first': 'Smith',
                'last_name': 'Appleseed'
            }
        }, {
            'info': {
                'created_at': '11/08/1993',
                'last_updated': '26/05/2012'
            },
            'author_name': {
                'first': 'Jane',
                'last_name': 'Doe'
            }
        }]
        result = nested_to_record(data)
        expected = [{
            'author_name.first': 'Smith',
            'author_name.last_name': 'Appleseed'
        }, {
            'author_name.first': 'Jane',
            'author_name.last_name': 'Doe',
            'info.created_at': '11/08/1993',
            'info.last_updated': '26/05/2012'
        }]

        assert result == expected
コード例 #7
0
def calc_metrics_multilabel(target, predictions):
    rep = {}
    target = np.array(target)
    nlabels = target.shape[1]
    predict_classes = np.where(predictions > 0.5, 1, 0)
    for i in range(nlabels):
        rep_i = nested_to_record(classification_report(target[:, i],
                                                       predict_classes[:, i],
                                                       output_dict=True),
                                 sep='/')
        rep_i.update(
            {'accuracy': accuracy_score(target[:, i], predict_classes[:, i])})
        rep_i.update(
            {'roc_auc': roc_auc_score(target[:, i], predictions[:, i])})
        rep_i.update({
            "pr_auc":
            average_precision_score(target[:, i], predictions[:, i])
        })
        for k in list(rep_i.keys()):
            rep_i['label_' + str(i) + '/' + k] = rep_i[k]
            del rep_i[k]

        rep.update(rep_i)

    macro_roc_auc = np.mean([v for k, v in rep.items() if 'roc_auc' in k])
    macro_pr_auc = np.mean([v for k, v in rep.items() if 'pr_auc' in k])

    rep['macro_roc_auc'] = macro_roc_auc
    rep['macro_pr_auc'] = macro_pr_auc

    return rep
コード例 #8
0
ファイル: test_normalize.py プロジェクト: zyazxr/pandas
 def test_nonetype_multiple_levels(self):
     # GH21158: If inner level json has a key with a null value
     # make sure it doesnt do a new_d.pop twice and except
     data = {
         "id": None,
         "location": {
             "id": None,
             "country": {
                 "id": None,
                 "state": {
                     "id": None,
                     "town.info": {
                         "region": None,
                         "x": 49.151580810546875,
                         "y": -33.148521423339844,
                         "z": 27.572303771972656,
                     },
                 },
             },
         },
     }
     result = nested_to_record(data)
     expected = {
         "id": None,
         "location.id": None,
         "location.country.id": None,
         "location.country.state.id": None,
         "location.country.state.town.info.region": None,
         "location.country.state.town.info.x": 49.151580810546875,
         "location.country.state.town.info.y": -33.148521423339844,
         "location.country.state.town.info.z": 27.572303771972656,
     }
     assert result == expected
コード例 #9
0
ファイル: test_normalize.py プロジェクト: birgirp/golfvaktin
    def test_one_level_deep_flattens(self):
        data = dict(flat1=1, dict1=dict(c=1, d=2))

        result = nested_to_record(data)
        expected = {'dict1.c': 1, 'dict1.d': 2, 'flat1': 1}

        assert result == expected
コード例 #10
0
    def test_flat_stays_flat(self):
        recs = [dict(flat1=1, flat2=2),
                dict(flat1=3, flat2=4),
                ]

        result = nested_to_record(recs)
        expected = recs
        assert result == expected
コード例 #11
0
def collect_gear_config(gear_id, client):
    '''
    Collects the gear's configuration and inputs
    '''
    gear = client.get_gear(gear_id)
    name = gear['gear']['name']
    label = gear['gear']['label']
    description = gear['gear']['description']
    inputs = nested_to_record(gear.gear.inputs)
    config = nested_to_record(gear.get_default_config())
    return ({
        'name': name,
        'inputs': inputs,
        'config': config,
        'label': label,
        'description': description
    })
コード例 #12
0
ファイル: test_normalize.py プロジェクト: JLL-Benson/CHN_DQ
    def test_flat_stays_flat(self):
        recs = [dict(flat1=1, flat2=2),
                dict(flat1=3, flat2=4),
                ]

        result = nested_to_record(recs)
        expected = recs
        assert result == expected
コード例 #13
0
ファイル: request.py プロジェクト: GeneralMills/pytrends
    def related_topics(self):
        """Request data from Google's Related Topics section and return a dictionary of dataframes

        If no top and/or rising related topics are found, the value for the key "top" and/or "rising" will be None
        """

        # make the request
        related_payload = dict()
        result_dict = dict()
        for request_json in self.related_topics_widget_list:
            # ensure we know which keyword we are looking at rather than relying on order
            kw = request_json['request']['restriction']['complexKeywordsRestriction']['keyword'][0]['value']
            # convert to string as requests will mangle
            related_payload['req'] = json.dumps(request_json['request'])
            related_payload['token'] = request_json['token']
            related_payload['tz'] = self.tz

            # parse the returned json
            req_json = self._get_data(
                url=TrendReq.RELATED_QUERIES_URL,
                method=TrendReq.GET_METHOD,
                trim_chars=5,
                params=related_payload,
            )

            # top topics
            try:
                top_list = req_json['default']['rankedList'][0]['rankedKeyword']
                df_top = pd.DataFrame([nested_to_record(d, sep='_') for d in top_list])
            except KeyError:
                # in case no top topics are found, the lines above will throw a KeyError
                df_top = None

            #rising topics
            try:
                rising_list = req_json['default']['rankedList'][1]['rankedKeyword']
                df_rising = pd.DataFrame([nested_to_record(d, sep='_')  for d in rising_list])
            except KeyError:
                # in case no rising topics are found, the lines above will throw a KeyError
                df_rising = None

            result_dict[kw] = {'rising': df_rising, 'top' : df_top}
        return result_dict
コード例 #14
0
ファイル: test_normalize.py プロジェクト: JLL-Benson/CHN_DQ
    def test_one_level_deep_flattens(self):
        data = dict(flat1=1,
                    dict1=dict(c=1, d=2))

        result = nested_to_record(data)
        expected = {'dict1.c': 1,
                    'dict1.d': 2,
                    'flat1': 1}

        assert result == expected
コード例 #15
0
def flatten_data(data, json_column='data'):
    json_data = data.pop(json_column)
    # this gets at any nested dicts as well
    flat_data = pandas.DataFrame(nested_to_record(json_data))
    # rename the columns so they can be un-flattened later
    flat_data.columns = [
        '{0}.{1}'.format(json_column, i) for i in flat_data.columns.values
    ]
    other_data = pandas.DataFrame(data)
    return pandas.concat([other_data, flat_data], axis=1)
コード例 #16
0
ファイル: json2csv.py プロジェクト: hernamesbarbara/r6stats
def flatten_page(page):
    meta = page['meta']
    players = page['players']
    for player in players:
        try:
            res = nested_to_record(player)
            for k, v in meta.items():
                res['meta.{}'.format(k)] = v
            yield res
        except Exception as err:
            sys.stderr.write(str(err))
            sys.stderr.flush()
            continue
コード例 #17
0
def main():

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        fw = flywheel.Client()
        assert fw, "Your Flywheel CLI credentials aren't set!"

    parser = argparse.ArgumentParser(description=(
        "Use this to query Flywheel for the gears available to you, or get the config file for a gear."
    ))

    parser.add_argument("-name",
                        "--gear-name",
                        dest='name',
                        help="Shorthand name of the gear on Flywheel",
                        required=True,
                        default='all')
    parser.add_argument(
        "-config",
        "--output-config",
        dest='config',
        help="True/False; Whether to output configuration file for running",
        required=False,
        default='False')

    args = parser.parse_args()

    config = str2bool(args.config)

    if args.name == 'all':
        gears = fw.gears()
        gears_table = [nested_to_record(g.to_dict(), sep='_') for g in gears]
        df = pd.DataFrame(gears_table)
        df = df.filter(regex=r'gear_label$|gear_name$|^category$', axis=1)
        print(tabulate(
            df,
            headers='keys',
            tablefmt='psql',
        ))

    else:
        gear = find_gear(args.name, fw)
        config_file = collect_gear_config(gear['_id'], fw)
        if config:
            with open('gear_config.json', 'w') as outfile:
                json.dump(config_file, outfile)
            print("Config file written.")
        else:
            print(json.dumps(config_file, indent=4))
コード例 #18
0
    def test_nested_flattens(self):
        data = dict(flat1=1,
                    dict1=dict(c=1, d=2),
                    nested=dict(e=dict(c=1, d=2),
                                d=2))

        result = nested_to_record(data)
        expected = {'dict1.c': 1,
                    'dict1.d': 2,
                    'flat1': 1,
                    'nested.d': 2,
                    'nested.e.c': 1,
                    'nested.e.d': 2}

        assert result == expected
コード例 #19
0
ファイル: test_normalize.py プロジェクト: JLL-Benson/CHN_DQ
    def test_nested_flattens(self):
        data = dict(flat1=1,
                    dict1=dict(c=1, d=2),
                    nested=dict(e=dict(c=1, d=2),
                                d=2))

        result = nested_to_record(data)
        expected = {'dict1.c': 1,
                    'dict1.d': 2,
                    'flat1': 1,
                    'nested.d': 2,
                    'nested.e.c': 1,
                    'nested.e.d': 2}

        assert result == expected
コード例 #20
0
 def get(self, cid):
     """Retrieve card for a single contribution.
     ---
     operationId: get_card
     parameters:
         - name: cid
           in: path
           type: string
           pattern: '^[a-f0-9]{24}$'
           required: true
           description: contribution ID (ObjectId)
     responses:
         200:
             description: contribution card
             schema:
                 type: string
     """
     ctx = {'cid': cid}
     mask = ['project', 'identifier', 'content.data']
     contrib = Contributions.objects.only(*mask).get(id=cid)
     info = Projects.objects.get(project=contrib.project)
     ctx['title'] = info.title
     ctx['descriptions'] = info.description.strip().split('.', 1)
     authors = [a.strip() for a in info.authors.split(',') if a]
     ctx['authors'] = {'main': authors[0], 'etal': authors[1:]}
     debug = current_app.config['DEBUG']
     ctx['landing_page'] = f'/{contrib.project}'
     ctx['more'] = f'/explorer/{cid}'
     ctx['urls'] = info.urls.values()
     card_script = get_resource_as_string('templates/linkify.min.js')
     card_script += get_resource_as_string(
         'templates/linkify-element.min.js')
     card_script += get_resource_as_string('templates/card.min.js')
     data = unflatten(
         dict((k.rsplit('.', 1)[0] if k.endswith('.display') else k, v)
              for k, v in nested_to_record(contrib.content.data,
                                           sep='.').items()
              if not k.endswith('.value') and not k.endswith('.unit')))
     browser = get_browser()
     browser.execute_script(card_script, data)
     bs = BeautifulSoup(browser.page_source, 'html.parser')
     ctx['data'] = bs.body.table
     browser.close()
     rendered = html_minify(render_template('card.html', **ctx))
     tree = html.fromstring(rendered)
     inline(tree)
     card = html.tostring(tree.body[0]).decode('utf-8')
     return card
コード例 #21
0
ファイル: test_normalize.py プロジェクト: zyazxr/pandas
    def test_nested_flattens(self):
        data = dict(flat1=1,
                    dict1=dict(c=1, d=2),
                    nested=dict(e=dict(c=1, d=2), d=2))

        result = nested_to_record(data)
        expected = {
            "dict1.c": 1,
            "dict1.d": 2,
            "flat1": 1,
            "nested.d": 2,
            "nested.e.c": 1,
            "nested.e.d": 2,
        }

        assert result == expected
コード例 #22
0
ファイル: test_normalize.py プロジェクト: zyazxr/pandas
 def test_with_large_max_level(self):
     # GH23843: Enhanced JSON normalize
     max_level = 100
     input_data = [{
         "CreatedBy": {
             "user": {
                 "name": {
                     "firstname": "Leo",
                     "LastName": "Thomson"
                 },
                 "family_tree": {
                     "father": {
                         "name": "Father001",
                         "father": {
                             "Name": "Father002",
                             "father": {
                                 "name": "Father003",
                                 "father": {
                                     "Name": "Father004"
                                 },
                             },
                         },
                     }
                 },
             }
         }
     }]
     expected = [{
         "CreatedBy.user.name.firstname":
         "Leo",
         "CreatedBy.user.name.LastName":
         "Thomson",
         "CreatedBy.user.family_tree.father.name":
         "Father001",
         "CreatedBy.user.family_tree.father.father.Name":
         "Father002",
         "CreatedBy.user.family_tree.father.father.father.name":
         "Father003",
         "CreatedBy.user.family_tree.father.father.father.father.Name":
         "Father004",
     }]
     output = nested_to_record(input_data, max_level=max_level)
     assert output == expected
コード例 #23
0
def calc_metrics_classification(target, predictions):
    if predictions.shape[-1] == 1:
        predictions = predictions[:, 0]
        predictions = np.array([1 - predictions, predictions]).T

    predict_classes = np.argmax(predictions, axis=-1)
    if len(np.unique(target)) < 4:
        rep = nested_to_record(classification_report(target,
                                                     predict_classes,
                                                     output_dict=True),
                               sep='/')
    else:
        rep = {}
    rep.update({'accuracy': accuracy_score(target, predict_classes)})
    if predictions.shape[-1] == 2:
        rep.update({'roc_auc': roc_auc_score(target, predictions[:, 1])})
        rep.update(
            {"pr_auc": average_precision_score(target, predictions[:, 1])})
    return rep
コード例 #24
0
ファイル: train.py プロジェクト: Jsevillamol/ctalearn
def make_combinations(config):
    """
    Generate all possible configurations that a config file specifies via 'multi_' parameters
    If there are no 'multi_' parameters this generator is empty
    """
    flat = nested_to_record(config)
    flat = {tuple(key.split('.')): value for key, value in flat.items()}
    multi_config_flat = {
        key[:-1] + (key[-1][6:], ): value
        for key, value in flat.items() if key[-1].startswith('multi')
    }
    if len(multi_config_flat) == 0:
        return  # if there are no multi params this generator is empty
    keys, values = zip(*multi_config_flat.items())

    # delete the multi_params
    # taken from https://stackoverflow.com/a/49723101/4841832
    def delete_keys_from_dict(dictionary, keys):
        """
        Delete fields in a nested dict
        """
        for key in keys:
            with suppress(KeyError):
                del dictionary[key]
        for value in dictionary.values():
            if isinstance(value, MutableMapping):
                delete_keys_from_dict(value, keys)

    to_delete = ['multi_' + key[-1] for key in multi_config_flat]
    delete_keys_from_dict(config, to_delete)

    for values in itertools.product(*values):
        experiment = dict(zip(keys, values))
        for setting, value in experiment.items():
            pointer_to_inner_dict = reduce(operator.getitem, setting[:-1],
                                           config)
            pointer_to_inner_dict[setting[-1]] = value
        yield config
コード例 #25
0
def elastic_dsl(client, dsl, index, **kwargs):
    """
    Sends DSL query to elasticsearch and returns the results as a
    :class:`pandas.DataFrame`.

    :param client: Configured elasticseach client. See :func:`create_elastic_client`
    :type client: :class:`elasticseach.Elasticsearch`
    :param dsl: Elasticsearch DSL query statement
                See https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html  # noqa: E501
    :type dsl: str
    :param index: Index pattern. Usually the same as 'from' part of the SQL
                  See https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-index.html # noqa: E501
    :type index: str

    :param kwargs: Any additional keyword arguments will be passed to the initial
                   :meth:`elasticsearch.Elasticsearch.search` call
    :type kwargs: dict

    :return: results as a :class:`pandas.DataFrame`.
    :rtype: :class:`pandas.DataFrame`
    """
    response = helpers.scan(client=client, query=dsl, index=index, **kwargs)
    data = []
    for row in response:
        # Normalize nested dicts in '_source' such as 'config' or 'git'
        source = nested_to_record(row["_source"]) if "_source" in row else {}

        # Squeeze scalar fields returned as arrays in the response by the search API
        fields = row.get("fields", {})
        fields = {k: v[0] if len(v) == 1 else v for k, v in fields.items()}

        data.append({
            "_index": row["_index"],
            "_type": row["_type"],
            **fields,
            **source,
        })
    return DataFrame(data)
コード例 #26
0
ファイル: test_normalize.py プロジェクト: zyazxr/pandas
    def test_donot_drop_nonevalues(self):
        # GH21356
        data = [
            {
                "info": None,
                "author_name": {
                    "first": "Smith",
                    "last_name": "Appleseed"
                }
            },
            {
                "info": {
                    "created_at": "11/08/1993",
                    "last_updated": "26/05/2012"
                },
                "author_name": {
                    "first": "Jane",
                    "last_name": "Doe"
                },
            },
        ]
        result = nested_to_record(data)
        expected = [
            {
                "info": None,
                "author_name.first": "Smith",
                "author_name.last_name": "Appleseed",
            },
            {
                "author_name.first": "Jane",
                "author_name.last_name": "Doe",
                "info.created_at": "11/08/1993",
                "info.last_updated": "26/05/2012",
            },
        ]

        assert result == expected
コード例 #27
0
ファイル: test_normalize.py プロジェクト: BobMcFry/pandas
    def test_nonetype_dropping(self):
        # GH20030: Checks that None values are dropped in nested_to_record
        # to prevent additional columns of nans when passed to DataFrame
        data = [
            {'info': None,
             'author_name':
             {'first': 'Smith', 'last_name': 'Appleseed'}
             },
            {'info':
                {'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
             'author_name':
                {'first': 'Jane', 'last_name': 'Doe'}
             }
        ]
        result = nested_to_record(data)
        expected = [
            {'author_name.first': 'Smith',
             'author_name.last_name': 'Appleseed'},
            {'author_name.first': 'Jane',
             'author_name.last_name': 'Doe',
             'info.created_at': '11/08/1993',
             'info.last_updated': '26/05/2012'}]

        assert result == expected
コード例 #28
0
ファイル: test_normalize.py プロジェクト: JLL-Benson/CHN_DQ
    def test_donot_drop_nonevalues(self):
        # GH21356
        data = [
            {'info': None,
             'author_name':
             {'first': 'Smith', 'last_name': 'Appleseed'}
             },
            {'info':
                {'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
             'author_name':
                {'first': 'Jane', 'last_name': 'Doe'}
             }
        ]
        result = nested_to_record(data)
        expected = [
            {'info': None,
             'author_name.first': 'Smith',
             'author_name.last_name': 'Appleseed'},
            {'author_name.first': 'Jane',
             'author_name.last_name': 'Doe',
             'info.created_at': '11/08/1993',
             'info.last_updated': '26/05/2012'}]

        assert result == expected
コード例 #29
0
ファイル: test_normalize.py プロジェクト: zyazxr/pandas
 def test_with_max_level(self, max_level, expected,
                         max_level_test_input_data):
     # GH23843: Enhanced JSON normalize
     output = nested_to_record(max_level_test_input_data,
                               max_level=max_level)
     assert output == expected
コード例 #30
0
ファイル: utils.py プロジェクト: DanIulian/minigrid_rl
def get_experiment_files(experiment_path: str, files: dict= {}, flag=False) \
        -> Tuple[Dict, pd.DataFrame, pd.DataFrame]:

    # Assumes each directory (/ experiment run) has a unique cfg
    cfg_files = glob.glob(f"{experiment_path}/**/cfg.yaml", recursive=True)
    cfg_files = natsort.natsorted(cfg_files)

    cfg_dfs = []

    data = dict()

    join_dfs = dict()
    # -- Load cfgs
    for run_index, cfg_file in enumerate(cfg_files):
        data[run_index] = dict()

        dir_name = os.path.dirname(cfg_file)
        data[run_index]["dir_name"] = dir_name

        run_name = dir_name.replace(experiment_path, "")
        run_name = run_name[1:] if run_name[0] == "/" else run_name
        data[run_index]["dir_name"] = run_name

        # -- Read cfg
        with open(os.path.join(cfg_file)) as handler:
            config_data = yaml.load(handler, Loader=yaml.SafeLoader)

        put_manual_id = False
        if "experiment_id" in config_data:
            experiment_id = config_data["experiment_id"]
        else:
            put_manual_id = True
            experiment_id = config_data["cfg_id"]

        run_id = config_data.get("run_id", 0)

        data[run_index]["experiment_id"] = experiment_id
        data[run_index]["run_id"] = run_id

        if flag:
            cfg_df = pd.DataFrame(nested_to_record(config_data, sep="."),
                                  index=[0])

        else:
            nc = nested_to_record(config_data)
            for k, v in nc.items():
                if isinstance(v, list):
                    nc[k] = np.array(v).astype(np.object)
            cfg_df = pd.DataFrame.from_dict(nc, orient="index").transpose()

        cfg_df["run_name"] = run_name
        cfg_df["run_index"] = run_index
        cfg_dfs.append(cfg_df)

        data["cfg"] = cfg_df

        # -- Read logs
        for file_name, file_type in files.items():
            file_path = os.path.join(dir_name, file_name)

            if not os.path.isfile(file_path):
                file_path = None
                continue

            file_data = file_path
            if hasattr(pd, str(file_type)) and file_path is not None:
                # Some bad header for experiments Fix

                file_data = getattr(pd, file_type)(file_path)
                if put_manual_id:
                    file_data["experiment_id"] = experiment_id
                    file_data["run_id"] = run_id

                file_data["run_index"] = run_index

                if file_name not in join_dfs:
                    join_dfs[file_name] = []

                join_dfs[file_name].append(file_data)

            data[file_name] = file_data

    cfgs = pd.concat(cfg_dfs)
    merge_dfs = cfgs.copy()

    for join_df_name, join_df in join_dfs.items():
        other_df = pd.concat(join_df, sort=True)
        try:
            try_merge = pd.merge(other_df,
                                 merge_dfs,
                                 how="left",
                                 on="run_index",
                                 sort=True)
            merge_dfs = try_merge
        except:
            print(f"Cannot merge {join_df_name}")

    return data, cfgs, merge_dfs
コード例 #31
0
def process_query(client, acquisitions, target_cols=None):
    '''
    Extract an acquisition

    This function extracts an acquisition object and collects all imaging files
    and important classification/BIDS information. These data are processed and
    returned as a pandas dataframe that can then be exported

    Parameters
    --------
    client
        A flywheel connection object
    acquisitions
        A list of flywheel acquisition objects
    target_cols
        List of specific columns to return

    Returns
    --------
    return_df
        A dataframe of the result of the query and processing
    '''

    acquisitions_list = []
    for x in tqdm(acquisitions, total=len(acquisitions)):
        try:
            tempacq = client.get(x.id)
            if tempacq is None:
                raise Exception

            d = {
                'acquisition.id': x.id,
                'acquisition.label': x.label,
                'session.id': x.session,
                'session.label': client.get(x.parents.session).label,
                'subject.id': x.parents.subject,
                'subject.label': client.get(x.parents.subject).label,
                'timestamp': x.timestamp
            }

            files = tempacq.files
            files = [f.to_dict() for f in files]
            for f in files:

                f.update(d)

        except Exception as e:
            print(e)
            global NO_DATA
            NO_DATA += 1
            continue
        acquisitions_list.extend(files)

    files_list = [
        nested_to_record(fdict, sep="_") for fdict in acquisitions_list
    ]

    global VERBOSE
    if VERBOSE:
        print("Tidying and returning the results...")
    # filter columns if necessary
    if not target_cols:
        cols = r'(\.label)|(\.id)|(classification)|(^type$)|(^modality$)|(BIDS)|(EchoTime)|(RepetitionTime)|(PhaseEncodingDirection)|(SequenceName)|(SeriesDescription)|(name)'

        # filter the dict keys for the columns names
        files_list = [{k: v
                       for k, v in my_dict.items() if re.search(cols, k)}
                      for my_dict in files_list]
        return_df = pd.DataFrame(files_list)
    else:
        required_cols = ['\.id', '\.label', 'name']

        target_cols.extend(required_cols)
        target_cols = "|".join(["({})".format(x) for x in target_cols])

        files_list = [{
            k: v
            for k, v in my_dict.items() if re.search(target_cols, k)
        } for my_dict in files_list]
        return_df = pd.DataFrame(files_list)

    #drop_downs = return_df.apply(is_list_column, 0, reduce=None).values
    #return_df.loc[:, drop_downs] = return_df.loc[:, drop_downs].applymap(unlist_item)
    if 'type' in return_df.columns:
        return_df = return_df[return_df.type.str.contains(
            r'nifti|dicom', na=False)].reset_index(drop=True)
    return (return_df)
コード例 #32
0
    def get(self, project):
        """Retrieve overview graph for a project.
        ---
        operationId: get_graph
        parameters:
            - name: project
              in: path
              type: string
              pattern: '^[a-zA-Z0-9_]{3,30}$'
              required: true
              description: project name/slug
            - name: columns
              in: query
              type: array
              items:
                  type: string
              required: true
              description: comma-separated list of column names to plot (in MongoDB dot notation)
            - name: filters
              in: query
              type: array
              items:
                  type: string
              description: list of `column__operator:value` filters \
                      with `column` in dot notation and `operator` in mongoengine format \
                      (http://docs.mongoengine.org/guide/querying.html#query-operators). \
                      `column` needs to be a valid field in `content.data`.
            - name: page
              in: query
              type: integer
              default: 1
              description: page to retrieve (in batches of `per_page`)
            - name: per_page
              in: query
              type: integer
              default: 200
              minimum: 2
              maximum: 200
              description: number of results to return per page
        responses:
            200:
                description: x-y-data in plotly format
                schema:
                    type: array
                    items:
                        type: object
                        properties:
                            x:
                                type: array
                                items:
                                    type: number
                            y:
                                type: array
                                items:
                                    type: number
        """
        mask = ['content.data', 'identifier']
        columns = request.args.get('columns').split(',')
        filters = request.args.get('filters', '').split(',')
        page = int(request.args.get('page', 1))
        PER_PAGE_MAX = 200
        per_page = int(request.args.get('per_page', PER_PAGE_MAX))
        per_page = PER_PAGE_MAX if per_page > PER_PAGE_MAX else per_page

        with no_dereference(Contributions) as ContributionsDeref:
            objects = ContributionsDeref.objects(project=project).only(*mask)
            data = [{'x': [], 'y': [], 'text': []} for col in columns]
            # C__gte:0.42,C__lte:2.10,ΔE-QP.direct__lte:11.3 -> content__data__C__value__lte
            if filters:
                query = {}
                for f in filters:
                    if '__' in f and ':' in f:
                        k, v = f.split(':')
                        col, op = k.rsplit('__', 1)
                        col = col.replace(".", "__")
                        key = f'content__data__{col}__value__{op}'
                        query[key] = float(v)
                objects = objects(**query)

            for obj in objects.paginate(page=page, per_page=per_page).items:
                d = nested_to_record(obj['content']['data'], sep='.')
                if all(f'{c}.display' in d.keys() for c in columns):
                    for idx, col in enumerate(columns):
                        val = d.get(f'{col}.display')
                        if val:
                            data[idx]['x'].append(obj.identifier)
                            data[idx]['y'].append(val.split(' ')[0])
                            data[idx]['text'].append(str(obj.id))
            return data
コード例 #33
0
    def get(self, project):
        """Retrieve a table of contributions for a project.
        ---
        operationId: get_table
        parameters:
            - name: project
              in: path
              type: string
              pattern: '^[a-zA-Z0-9_]{3,30}$'
              required: true
              description: project name/slug
            - name: columns
              in: query
              type: array
              items:
                  type: string
              description: comma-separated list of column names to tabulate
            - name: page
              in: query
              type: integer
              default: 1
              description: page to retrieve (in batches of `per_page`)
            - name: per_page
              in: query
              type: integer
              default: 20
              minimum: 2
              maximum: 20
              description: number of results to return per page
            - name: q
              in: query
              type: string
              description: substring to search for in first non-id column
            - name: order
              in: query
              type: string
              description: sort ascending or descending
              enum: [asc, desc]
            - name: sort_by
              in: query
              type: string
              description: column name to sort by
        responses:
            200:
                description: Paginated table response in backgrid format (items = rows of table)
                schema:
                    type: object
                    properties:
                        total_count:
                            type: integer
                        total_pages:
                            type: integer
                        page:
                            type: integer
                        last_page:
                            type: integer
                        per_page:
                            type: integer
                        items:
                            type: array
                            items:
                                type: object
        """
        # config and parameters
        explorer = 'http://localhost:8080/explorer' if current_app.config['DEBUG'] \
            else 'https://portal.mpcontribs.org/explorer'
        mp_site = 'https://materialsproject.org/materials'
        mask = ['content.data', 'content.structures', 'identifier']
        search = request.args.get('q')
        page = int(request.args.get('page', 1))
        PER_PAGE_MAX = current_app.config['PER_PAGE_MAX']
        per_page = int(request.args.get('per_page', PER_PAGE_MAX))
        per_page = PER_PAGE_MAX if per_page > PER_PAGE_MAX else per_page
        order = request.args.get('order')
        sort_by = request.args.get('sort_by', 'identifier')
        general_columns = ['identifier', 'id']
        user_columns = request.args.get('columns', '').split(',')
        objects = Contributions.objects(project=project).only(*mask)

        # default user_columns
        sample = objects.first()['content']['data']
        data_keys = sorted(
            list(
                k.rsplit('.', 1)[0] if k.endswith('.display') else k
                for k, v in nested_to_record(sample, sep='.').items()
                if not k.endswith('.value') and not k.endswith('.unit')))
        if not data_keys:
            return {
                'total_count': 0,
                'total_pages': 0,
                'page': 1,
                'last_page': 1,
                'per_page': per_page,
                'items': []
            }
        formula_key_exists = bool('formula' in data_keys)
        if formula_key_exists:
            general_columns.append('formula')
        else:
            # test whether search key exists in all docs and is not a number/object
            search_key = data_keys[0].replace('.', '__')
            q1 = {f'content__data__{search_key}__exists': False}
            q2 = {f'content__data__{search_key}__type': 'object'}
            if objects(Q(**q1) | Q(**q2)).count() < 1:
                general_columns.append(data_keys[0])
            else:
                general_columns.append('formula')

        if not user_columns[0]:
            if formula_key_exists:
                data_keys.remove('formula')
            user_columns = data_keys if 'formula' in general_columns else data_keys[
                1:]

        # add units to column names
        units = [
            objects.distinct(f'content.data.{col}.unit')
            for col in user_columns
        ]
        columns = general_columns + [
            '{} [{}]'.format(col, units[idx][0]) if units[idx] else col
            for idx, col in enumerate(user_columns)
        ]

        # search and sort
        if search is not None:
            kwargs = {
                f'content__data__{general_columns[-1]}__exists': True,
                f'content__data__{general_columns[-1]}__contains': search
            }
            objects = objects(Q(identifier__contains=search) | Q(**kwargs))
        sort_by_key = sort_by
        if ' ' in sort_by and sort_by[-1] == ']':
            sort_by = sort_by.split(' ')[0]  # remove unit
            sort_by_key = f'content.data.{sort_by}.value'
        elif sort_by in columns[2:]:
            sort_by_key = f'content.data.{sort_by}'
        order_sign = '-' if order == 'desc' else '+'
        order_by = f"{order_sign}{sort_by_key}"
        objects = objects.order_by(order_by)

        # generate table page
        items = []
        for doc in objects.paginate(page=page, per_page=per_page).items:
            mp_id = doc['identifier']
            contrib = nested_to_record(doc['content']['data'], sep='.')
            search_value = contrib.get(general_columns[-1],
                                       mp_id).replace(' ', '')
            row = [
                f"{mp_site}/{mp_id}", f"{explorer}/{doc['id']}", search_value
            ]

            for idx, col in enumerate(user_columns):
                cell = ''
                if 'CIF' in col:
                    structures = doc['content']['structures']
                    if '.' in col:  # grouped columns
                        sname = '.'.join(col.split(
                            '.')[:-1])  # remove CIF string from field name
                        for d in structures:
                            if d['name'] == sname:
                                cell = f"{explorer}/{d['id']}.cif"
                                break
                    elif structures:
                        cell = f"{explorer}/{structures[0]['id']}.cif"
                else:
                    cell = contrib.get(col + '.value', contrib.get(col, ''))
                row.append(str(cell))

            items.append(dict(zip(columns, row)))

        total_count = objects.count()
        total_pages = int(total_count / per_page)
        if total_pages % per_page:
            total_pages += 1

        return {
            'total_count': total_count,
            'total_pages': total_pages,
            'page': page,
            'last_page': total_pages,
            'per_page': per_page,
            'items': items
        }
コード例 #34
0
def get_experiment_files(experiment_path: str, files: dict = {}):

    # Assumes each directory (/ experiment run) has a unique cfg
    cfg_files = glob.glob(f"{experiment_path}/**/cfg.yaml", recursive=True)
    cfg_files = natsort.natsorted(cfg_files)

    cfg_dfs = []

    data = dict()

    join_dfs = dict()
    # -- Load cfgs
    for run_index, cfg_file in enumerate(cfg_files):
        data[run_index] = dict()

        dir_name = os.path.dirname(cfg_file)
        data[run_index]["dir_name"] = dir_name

        run_name = dir_name.replace(experiment_path, "")
        run_name = run_name[1:] if run_name[0] == "/" else run_name
        data[run_index]["dir_name"] = run_name

        # -- Read cfg
        with open(os.path.join(cfg_file)) as handler:
            config_data = yaml.load(handler, Loader=yaml.SafeLoader)

        put_manual_id = False
        if "experiment_id" in config_data:
            experiment_id = config_data["experiment_id"]
        else:
            put_manual_id = True
            experiment_id = config_data["cfg_id"]
        run_id = getattr(config_data, "run_id", 0)

        data[run_index]["experiment_id"] = experiment_id
        data[run_index]["run_id"] = run_id

        #cfg_df = pd.DataFrame(nested_to_record(config_data, sep="."), index=[0])
        cfg_df = pd.DataFrame(nested_to_record(config_data))
        cfg_df["run_name"] = run_name
        cfg_df["run_index"] = run_index
        cfg_dfs.append(cfg_df)

        data["cfg"] = cfg_df

        # -- Read logs
        for file_name, file_type in files.items():
            file_path = os.path.join(dir_name, file_name)

            if not os.path.isfile(file_path):
                file_path = None
                continue

            file_data = file_path
            if hasattr(pd, str(file_type)) and file_path is not None:
                # Some bad header for experiments Fix
                # file_data = getattr(pd, file_type)(file_path, skiprows=1, names=['update', 'frames', 'FPS', 'duration', 'rreturn_mean', 'rreturn_std', 'rreturn_min', 'rreturn_max', 'num_frames_mean', 'num_frames_std', 'num_frames_min', 'num_frames_max', 'entropy', 'value', 'policy_loss', 'value_loss', 'grad_norm', 'value_ext', 'value_int', 'value_ext_loss', 'value_int_loss', 'return_mean', 'return_std', 'return_min', 'return_max'])
                file_data = getattr(pd, file_type)(file_path)
                if put_manual_id:
                    file_data["experiment_id"] = experiment_id
                    file_data["run_id"] = run_id

                file_data["run_index"] = run_index

                if file_name not in join_dfs:
                    join_dfs[file_name] = []

                join_dfs[file_name].append(file_data)

            data[file_name] = file_data

    cfgs = pd.concat(cfg_dfs)
    merge_dfs = cfgs.copy()

    for join_df_name, join_df in join_dfs.items():
        other_df = pd.concat(join_df, sort=True)
        try:
            try_merge = pd.merge(other_df,
                                 merge_dfs,
                                 how="left",
                                 on="run_index",
                                 sort=True)
            merge_dfs = try_merge
        except:
            print(f"Cannot merge {join_df_name}")

    return data, cfgs, merge_dfs